diff --git a/README.md b/README.md index d615c23a2..386045e25 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org). portBLAS - x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU + x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU, Other SYCL devices (unsupported) portFFT @@ -172,7 +172,7 @@ Supported compilers include: - BLAS + BLAS x86 CPU Intel(R) oneMKL Intel DPC++
AdaptiveCpp @@ -221,6 +221,12 @@ Supported compilers include: Open DPC++ Dynamic, Static + + Other SYCL devices (unsupported) + portBLAS + Intel DPC++
Open DPC++ + Dynamic, Static + LAPACK x86 CPU @@ -405,6 +411,7 @@ Supported compilers include: - Intel(R) Data Center GPU Max Series - NVIDIA(R) A100 (Linux* only) - AMD(R) GPUs see [here](https://github.com/RadeonOpenCompute/ROCm#hardware-and-software-support) tested on AMD Vega 20 (gfx906) + - Other SYCL devices can be used, but are not supported --- ### Supported Operating Systems diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst index e33a78046..2fea9395f 100644 --- a/docs/building_the_project_with_dpcpp.rst +++ b/docs/building_the_project_with_dpcpp.rst @@ -225,6 +225,21 @@ A few often-used architectures are listed below: For a host with ROCm installed, the device architecture can be retrieved via the ``rocminfo`` tool. The architecture will be displayed in the ``Name:`` row. +.. _build_for_other_SYCL_devices: + +Building for other SYCL devices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +SYCL enables portable heterogeneous computing on a wide range of accelerators. +Consequently, it is possible to use oneMKL Interfaces with accelerators not +anticipated by the oneMKL Interfaces team. + +For generic SYCL devices, only the portBLAS backend is enabled. The user must +set the appropriate ``-fsycl-targets`` for their device, and also any +``PORTBLAS_TUNING_TARGET`` required for performance. See +`Building for portBLAS`_. Extensive testing is strongly advised for these +unsupported configurations. + .. _build_for_portlibs_dpcpp: Pure SYCL backends: portBLAS and portFFT @@ -408,6 +423,21 @@ set, the backend libraries to enable the use of BLAS, LAPACK and RNG with MKLGPU and MKLCPU would also be enabled. The build of examples is disabled. Since functional testing was not disabled, tests would be built. +Build oneMKL for the BLAS domain on a generic SYCL device: + +.. code-block:: bash + + cmake $ONEMKL_DIR \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_C_COMPILER=clang \ + -DENABLE_MKLCPU_BACKEND=False \ + -DENABLE_MKLGPU_BACKEND=False \ + -DENABLE_PORTBLAS_BACKEND=True + +Note that this is not a tested configuration. This builds oneMKL Interfaces +with the portBLAS backend only, for a generic SYCL device supported by the +Open DPC++ project. + .. _project_cleanup: Project Cleanup diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/mkl/detail/backends_table.hpp index 8e68674cc..b385b21b0 100644 --- a/include/oneapi/mkl/detail/backends_table.hpp +++ b/include/oneapi/mkl/detail/backends_table.hpp @@ -40,7 +40,7 @@ namespace oneapi { namespace mkl { -enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu }; +enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu, generic_device }; enum class domain : uint16_t { blas, dft, lapack, rng, sparse_blas }; static std::map>> libraries = { @@ -82,6 +82,12 @@ static std::map>> libraries = #endif #ifdef ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU LIB_NAME("blas_portblas"), +#endif + } }, + { device::generic_device, + { +#ifdef ENABLE_PORTBLAS_BACKEND + LIB_NAME("blas_portblas"), #endif } } } }, diff --git a/include/oneapi/mkl/detail/get_device_id.hpp b/include/oneapi/mkl/detail/get_device_id.hpp index 88b235754..fbfe64219 100644 --- a/include/oneapi/mkl/detail/get_device_id.hpp +++ b/include/oneapi/mkl/detail/get_device_id.hpp @@ -59,11 +59,11 @@ inline oneapi::mkl::device get_device_id(sycl::queue &queue) { else if (vendor_id == AMD_ID) device_id = device::amdgpu; else { - throw unsupported_device("", "", queue.get_device()); + device_id = device::generic_device; } } else { - throw unsupported_device("", "", queue.get_device()); + device_id = device::generic_device; } return device_id; } diff --git a/src/blas/blas_loader.cpp b/src/blas/blas_loader.cpp index c1f1339c6..1c3cfcb71 100644 --- a/src/blas/blas_loader.cpp +++ b/src/blas/blas_loader.cpp @@ -35,413 +35,418 @@ static oneapi::mkl::detail::table_initializer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_scasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_scasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dzasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dzasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_sasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_sasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dasum_sycl(queue, n, x, incx, result); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_saxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_daxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_caxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_zaxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_scopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_scopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_dcopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_ccopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_ccopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_zcopy_sycl(queue, n, x, incx, y, incy); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_scopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_scopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_dcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_ccopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_zcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_sdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_sdot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_ddot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_ddot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_isamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_isamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_idamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_idamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_icamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_icamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_izamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_izamin_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_isamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_isamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_idamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_idamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_icamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_icamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_izamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_izamax_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_scnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_scnrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dznrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dznrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_snrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_snrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dnrm2_sycl(queue, n, x, incx, result); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { - function_tables[libkey].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { - function_tables[libkey].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { - function_tables[libkey].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { - function_tables[libkey].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].column_major_srotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_srotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].column_major_drotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_drotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].column_major_crotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_crotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].column_major_zrotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_zrotg_sycl(queue, a, b, c, s); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].column_major_srotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].column_major_srotm_sycl(queue, n, x, incx, y, incy, param); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].column_major_drotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].column_major_drotm_sycl(queue, n, x, incx, y, incy, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, float y1, sycl::buffer ¶m) { - function_tables[libkey].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, double y1, sycl::buffer ¶m) { - function_tables[libkey].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_sscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_sscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_dscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_cscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_cscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_csscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_csscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_zscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_zscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_zdscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_zdscal_sycl(queue, n, alpha, x, incx); } void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, + result); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_sswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_dswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_cswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_cswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_zswap_sycl(queue, n, x, incx, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -449,8 +454,8 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -458,40 +463,40 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -499,9 +504,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_sgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -509,9 +514,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_dgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -520,9 +525,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_cgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -531,9 +536,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_zgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, @@ -541,7 +546,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_sdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -550,7 +555,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_ddgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -559,7 +564,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_cdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -568,430 +573,444 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_zdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_cher2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_zher2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_chpr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_sgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -999,8 +1018,8 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1008,32 +1027,32 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_hgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_gemm_f16f16f32_sycl(queue, transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_gemm_bf16bf16f32_sycl(queue, transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1041,8 +1060,8 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_chemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1050,23 +1069,23 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zhemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cherk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zherk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1074,8 +1093,8 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cher2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1083,24 +1102,24 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zher2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_ssymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1108,8 +1127,8 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_csymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1117,56 +1136,56 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_csyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zsyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_ssyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_dsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1174,9 +1193,9 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_csyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1184,25 +1203,25 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_zsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_ssyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dsyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1210,8 +1229,8 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_csyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1219,72 +1238,72 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zsyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_strmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_strmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_dtrmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_dtrmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ctrmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ctrmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ztrmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ztrmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_strsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_strsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_dtrsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_dtrsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ctrsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ctrsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ztrsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ztrsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1293,7 +1312,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_sgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1304,7 +1323,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_dgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1315,7 +1334,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_cgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1326,7 +1345,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_zgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1337,7 +1356,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_hgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1348,7 +1367,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_gemm_f16f16f32_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1359,7 +1378,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_gemm_s8s8f32_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1370,7 +1389,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_gemm_s8s8s32_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1380,7 +1399,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_strsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1390,7 +1409,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_dtrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1400,7 +1419,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_ctrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1410,7 +1429,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_ztrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1419,16 +1438,16 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_sgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_sgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -1436,8 +1455,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -1445,8 +1464,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1454,7 +1473,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_s8u8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1463,7 +1482,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_s8s8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1472,7 +1491,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_u8s8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1481,7 +1500,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_u8u8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1489,7 +1508,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_somatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1497,7 +1516,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_domatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1506,7 +1525,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_comatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1515,38 +1534,38 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_zomatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_simatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_dimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_cimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_zimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -1555,7 +1574,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_somatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1566,7 +1585,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_domatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1577,7 +1596,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_comatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1589,7 +1608,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_zomatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1597,97 +1616,105 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_somatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_somatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_domatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_domatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1695,8 +1722,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_comatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_comatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1704,8 +1731,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_zomatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs @@ -1713,64 +1740,64 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_scasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_scasum_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dzasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dzasum_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_sasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_sasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_caxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1778,7 +1805,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 double *alpha, const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1787,7 +1814,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1796,7 +1823,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1804,7 +1831,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1812,7 +1839,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1821,7 +1848,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1830,73 +1857,73 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_saxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_daxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_caxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_zaxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_scopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_scopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_dcopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_ccopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_zcopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_scopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1904,7 +1931,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1912,7 +1939,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ccopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1920,7 +1947,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1928,7 +1955,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_scopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1936,7 +1963,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1944,7 +1971,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ccopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1952,303 +1979,307 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dsdot_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_cdotc_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdotc_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_cdotu_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdotu_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_isamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_isamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_idamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_idamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_icamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_icamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_izamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_izamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_isamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_isamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_idamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_idamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_icamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_icamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_izamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_izamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_scnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_scnrm2_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dznrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dznrm2_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_snrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_snrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dnrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].column_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_srot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].column_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_drot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].column_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_csrot_usm_sycl(queue, n, x, incx, y, + incy, c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdrot_usm_sycl(queue, n, x, incx, y, + incy, c, s, dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, float *b, float *c, float *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_srotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, double *b, double *c, double *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_drotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, float *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_crotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, double *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_zrotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_srotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_srotm_usm_sycl( + queue, n, x, incx, y, incy, param, dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_drotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_drotm_usm_sycl( + queue, n, x, incx, y, incy, param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_sscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_sscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_dscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_cscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_cscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_csscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_csscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_zscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_zscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, incy, - result, dependencies); + return function_tables[{ libkey, queue }].column_major_sdsdot_usm_sycl( + queue, n, sb, x, incx, y, incy, result, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_sswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_dswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_cswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_cswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_zswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2256,7 +2287,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2265,7 +2296,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2274,7 +2305,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2282,16 +2313,16 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_sgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_dgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -2299,8 +2330,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_cgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -2308,8 +2339,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_zgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, @@ -2318,7 +2349,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, float beta, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2329,7 +2360,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, double beta, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2341,7 +2372,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2353,7 +2384,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2363,7 +2394,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2373,7 +2404,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2384,7 +2415,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **x, std::int64_t *incx, std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2396,7 +2427,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2406,7 +2437,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const float *x, std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2416,7 +2447,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const double *x, std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ddgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2427,7 +2458,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2438,7 +2469,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2448,7 +2479,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -2457,7 +2488,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ddgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -2466,7 +2497,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -2475,55 +2506,55 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_sger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_sger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_dger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_dger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -2531,7 +2562,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_chbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_chbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2540,7 +2571,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zhbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2549,7 +2580,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_chemv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_chemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2558,7 +2589,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhemv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zhemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2566,32 +2597,32 @@ sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower float alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cher_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zher_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cher2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zher2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -2599,8 +2630,8 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_chpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_chpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -2608,45 +2639,45 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_zhpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_chpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_chpr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_zhpr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_chpr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_chpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhpr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_zhpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2654,57 +2685,57 @@ sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_sspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *a, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_dspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_sspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_sspr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_dspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_dspr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_sspr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_sspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_dspr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_dspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssymv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2712,224 +2743,224 @@ sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsymv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_ssyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_dsyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_ssyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_dsyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_strmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_strmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_strsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2937,7 +2968,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2946,7 +2977,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2955,7 +2986,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2964,7 +2995,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, sycl::half *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_hgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2972,7 +3003,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_f16f16f32_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2980,7 +3011,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_bf16bf16f32_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2989,7 +3020,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_chemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2998,7 +3029,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3006,7 +3037,7 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cherk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3014,7 +3045,7 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zherk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3023,7 +3054,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cher2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3032,7 +3063,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zher2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3040,7 +3071,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3048,7 +3079,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3057,7 +3088,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_csymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3066,7 +3097,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3074,7 +3105,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3082,7 +3113,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3091,7 +3122,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3100,7 +3131,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3109,7 +3140,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const float **a, std::int64_t *lda, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3119,7 +3150,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const double **a, std::int64_t *lda, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3130,7 +3161,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3141,7 +3172,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3151,7 +3182,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::int64_t lda, std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3161,7 +3192,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe const double *a, std::int64_t lda, std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3172,7 +3203,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3183,7 +3214,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3192,7 +3223,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3200,7 +3231,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3209,7 +3240,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3218,7 +3249,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3226,18 +3257,18 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_strmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_strmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3245,9 +3276,9 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3255,27 +3286,27 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_strsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3283,9 +3314,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3293,9 +3324,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, @@ -3303,7 +3334,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3313,7 +3344,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3324,7 +3355,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3335,7 +3366,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3345,7 +3376,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3355,7 +3386,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3366,7 +3397,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3377,7 +3408,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3388,7 +3419,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3399,7 +3430,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3410,7 +3441,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **b, std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3422,7 +3453,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3433,7 +3464,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_hgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3444,7 +3475,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_f16f16f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3455,7 +3486,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3466,7 +3497,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8s32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3477,7 +3508,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3488,7 +3519,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3500,7 +3531,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3512,7 +3543,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3524,7 +3555,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_hgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3535,7 +3566,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_f16f16f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3546,7 +3577,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3557,7 +3588,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8s32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3566,18 +3597,18 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_sgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_dgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -3586,9 +3617,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_cgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -3597,9 +3628,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_zgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -3608,7 +3639,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3619,7 +3650,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3630,7 +3661,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_u8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3641,7 +3672,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_u8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3651,7 +3682,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3660,7 +3691,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3669,7 +3700,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3678,7 +3709,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3686,7 +3717,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_simatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3694,7 +3725,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3703,7 +3734,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3712,7 +3743,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3722,7 +3753,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3733,7 +3764,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3745,7 +3776,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3757,7 +3788,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3765,39 +3796,39 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_somatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_domatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_comatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_zomatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -3805,7 +3836,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -3814,7 +3845,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -3823,45 +3854,45 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_simatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_simatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_dimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_dimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_cimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_cimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_zimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_zimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3869,7 +3900,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3878,7 +3909,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3887,7 +3918,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3896,7 +3927,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3905,7 +3936,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3914,7 +3945,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3923,7 +3954,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3931,7 +3962,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_simatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3939,7 +3970,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_dimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3948,7 +3979,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_cimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3957,7 +3988,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_zimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3973,413 +4004,418 @@ static oneapi::mkl::detail::table_initializer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_scasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_scasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dzasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dzasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_sasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_sasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dasum_sycl(queue, n, x, incx, result); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_saxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_daxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_caxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_zaxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_scopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_scopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_dcopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_ccopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_ccopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_zcopy_sycl(queue, n, x, incx, y, incy); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_scopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_scopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_dcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_ccopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_zcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_sdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_sdot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_ddot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_ddot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_isamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_isamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_idamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_idamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_icamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_icamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_izamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_izamin_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_isamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_isamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_idamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_idamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_icamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_icamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_izamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_izamax_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_scnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_scnrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dznrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dznrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_snrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_snrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dnrm2_sycl(queue, n, x, incx, result); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { - function_tables[libkey].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { - function_tables[libkey].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { - function_tables[libkey].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { - function_tables[libkey].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].row_major_srotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_srotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].row_major_drotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_drotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].row_major_crotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_crotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].row_major_zrotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_zrotg_sycl(queue, a, b, c, s); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].row_major_srotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].row_major_srotm_sycl(queue, n, x, incx, y, incy, param); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].row_major_drotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].row_major_drotm_sycl(queue, n, x, incx, y, incy, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, float y1, sycl::buffer ¶m) { - function_tables[libkey].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, double y1, sycl::buffer ¶m) { - function_tables[libkey].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_sscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_sscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_dscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_cscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_cscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_csscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_csscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_zscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_zscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_zdscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_zdscal_sycl(queue, n, alpha, x, incx); } void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, + result); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_sswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_dswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_cswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_cswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_zswap_sycl(queue, n, x, incx, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4387,8 +4423,8 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4396,40 +4432,40 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4437,9 +4473,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_sgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4447,9 +4483,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_dgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4458,9 +4494,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_cgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4469,9 +4505,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_zgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, @@ -4479,7 +4515,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_sdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -4488,7 +4524,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_ddgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -4497,7 +4533,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_cdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -4506,426 +4542,444 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_zdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -4933,8 +4987,8 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -4942,32 +4996,32 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_gemm_f16f16f32_sycl(queue, transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_gemm_bf16bf16f32_sycl(queue, transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -4975,8 +5029,8 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_chemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -4984,23 +5038,23 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zhemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5008,8 +5062,8 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5017,24 +5071,24 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_ssymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -5042,8 +5096,8 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_csymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -5051,56 +5105,56 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_ssyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_dsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5108,9 +5162,9 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_csyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5118,25 +5172,25 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_zsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5144,8 +5198,8 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5153,72 +5207,72 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_strmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_strmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_dtrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_dtrmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ctrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ctrmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ztrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ztrmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_strsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_strsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_dtrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_dtrsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ctrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ctrsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ztrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ztrsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5227,7 +5281,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_sgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5238,7 +5292,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_dgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5249,7 +5303,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_cgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5260,7 +5314,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_zgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5271,7 +5325,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_hgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5282,7 +5336,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_gemm_f16f16f32_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5293,7 +5347,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_gemm_s8s8f32_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5304,7 +5358,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_gemm_s8s8s32_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5314,7 +5368,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_strsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5324,7 +5378,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_dtrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5334,7 +5388,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_ctrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5344,7 +5398,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_ztrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5353,16 +5407,16 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_sgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_sgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -5370,8 +5424,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -5379,8 +5433,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5388,7 +5442,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_s8u8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5397,7 +5451,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_s8s8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5406,7 +5460,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_u8s8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5415,7 +5469,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_u8u8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5423,7 +5477,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_somatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5431,7 +5485,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_domatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5440,7 +5494,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_comatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5449,38 +5503,38 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_zomatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_simatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_dimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_cimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_zimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -5489,7 +5543,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_somatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5500,7 +5554,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_domatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5511,7 +5565,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_comatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5523,7 +5577,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_zomatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5531,97 +5585,105 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_somatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_somatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_domatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_domatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5629,8 +5691,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_comatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_comatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5638,8 +5700,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } // USM APIs @@ -5647,64 +5709,64 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_scasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_scasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dzasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dzasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_sasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_sasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5712,7 +5774,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 double *alpha, const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5721,7 +5783,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5730,7 +5792,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5738,7 +5800,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5746,7 +5808,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5755,7 +5817,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5764,73 +5826,73 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_saxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_daxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_caxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zaxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_scopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5838,7 +5900,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5846,7 +5908,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ccopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5854,7 +5916,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5862,7 +5924,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_scopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5870,7 +5932,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5878,7 +5940,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ccopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5886,299 +5948,307 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_isamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_isamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_idamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_idamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_icamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_icamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_izamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_izamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_isamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_isamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_idamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_idamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_icamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_icamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_izamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_izamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_scnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_scnrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dznrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dznrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_snrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_snrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dnrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, + s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].row_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, + s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, float *b, float *c, float *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_srotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, double *b, double *c, double *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_drotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, float *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_crotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, double *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_zrotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy, + param, dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy, + param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_sscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_sscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_cscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_cscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_csscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].row_major_csscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_zscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_zscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, + incy, result, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6186,7 +6256,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6195,7 +6265,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6204,7 +6274,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6212,16 +6282,16 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_sgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -6229,8 +6299,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_cgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -6238,8 +6308,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, @@ -6248,7 +6318,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, float beta, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6259,7 +6329,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, double beta, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6271,7 +6341,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6283,7 +6353,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6293,7 +6363,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6303,7 +6373,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6314,7 +6384,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **x, std::int64_t *incx, std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6326,7 +6396,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6336,7 +6406,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const float *x, std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6346,7 +6416,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const double *x, std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ddgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6357,7 +6427,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6368,7 +6438,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6378,7 +6448,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -6387,7 +6457,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ddgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -6396,7 +6466,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -6405,55 +6475,55 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_sger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_sger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_dger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_dger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6461,8 +6531,8 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_chbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_chbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6470,8 +6540,8 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zhbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6479,8 +6549,8 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_chemv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_chemv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6488,40 +6558,40 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhemv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zhemv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cher_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zher_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cher2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zher2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6529,8 +6599,8 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_chpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_chpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6538,328 +6608,328 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zhpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_chpr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_chpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhpr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_zhpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_ssbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dsbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_sspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *a, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_sspr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_sspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_dspr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_dspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssymv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_ssymv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsymv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dsymv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_ssyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_dsyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_ssyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_dsyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_strmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_strmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_strsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6867,7 +6937,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6876,7 +6946,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6885,7 +6955,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6894,7 +6964,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, sycl::half *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_hgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6902,7 +6972,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_f16f16f32_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6910,7 +6980,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_bf16bf16f32_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6919,7 +6989,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_chemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6928,7 +6998,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6936,16 +7006,16 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cherk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_cherk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zherk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_zherk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -6953,7 +7023,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cher2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6962,7 +7032,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zher2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6970,7 +7040,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6978,7 +7048,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6987,7 +7057,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_csymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6996,7 +7066,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7004,16 +7074,16 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_ssyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_dsyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -7021,8 +7091,8 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_csyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -7030,8 +7100,8 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_zsyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, @@ -7039,7 +7109,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const float **a, std::int64_t *lda, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7049,7 +7119,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const double **a, std::int64_t *lda, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7060,7 +7130,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7071,7 +7141,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7081,7 +7151,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::int64_t lda, std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7091,7 +7161,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe const double *a, std::int64_t lda, std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7102,7 +7172,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7113,7 +7183,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7122,7 +7192,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7130,7 +7200,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7139,7 +7209,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7148,7 +7218,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7156,18 +7226,18 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_strmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_strmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_dtrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7175,9 +7245,9 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ctrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7185,27 +7255,27 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ztrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_strsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_dtrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7213,9 +7283,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ctrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7223,9 +7293,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ztrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, @@ -7233,7 +7303,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7243,7 +7313,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7254,7 +7324,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7265,7 +7335,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7275,7 +7345,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7285,7 +7355,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7296,7 +7366,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7307,7 +7377,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7318,7 +7388,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7329,7 +7399,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7340,7 +7410,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **b, std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7352,7 +7422,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7363,7 +7433,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_hgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7374,7 +7444,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_f16f16f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7385,7 +7455,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7396,7 +7466,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8s32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7407,7 +7477,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7418,7 +7488,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7430,7 +7500,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7442,7 +7512,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7454,7 +7524,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_hgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7465,7 +7535,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_f16f16f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7476,7 +7546,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7487,7 +7557,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8s32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7496,18 +7566,18 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_sgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_dgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -7516,9 +7586,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_cgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -7527,9 +7597,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_zgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -7538,7 +7608,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7549,7 +7619,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7560,7 +7630,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_u8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7571,7 +7641,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_u8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7581,7 +7651,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7590,7 +7660,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7599,7 +7669,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7608,7 +7678,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7616,7 +7686,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_simatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7624,7 +7694,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7633,7 +7703,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7642,7 +7712,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7652,7 +7722,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7663,7 +7733,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7675,7 +7745,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7687,7 +7757,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7695,39 +7765,39 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_somatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_domatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_comatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_zomatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -7735,7 +7805,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -7744,7 +7814,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -7753,45 +7823,45 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_simatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_simatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_dimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_dimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_cimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_cimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_zimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_zimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7799,7 +7869,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7808,7 +7878,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7817,7 +7887,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7826,7 +7896,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7835,7 +7905,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7844,7 +7914,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7853,7 +7923,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7861,7 +7931,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_simatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -7869,7 +7939,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_dimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -7878,7 +7948,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_cimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -7887,7 +7957,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_zimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } diff --git a/src/dft/dft_loader.cpp b/src/dft/dft_loader.cpp index b0c421fb0..55a280388 100644 --- a/src/dft/dft_loader.cpp +++ b/src/dft/dft_loader.cpp @@ -34,28 +34,28 @@ template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_fz(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_fz(desc, sycl_queue); } template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_dz(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_dz(desc, sycl_queue); } template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_fr(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_fr(desc, sycl_queue); } template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_dr(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_dr(desc, sycl_queue); } template diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index 24b2ffb86..881a5205f 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -59,14 +59,20 @@ class table_initializer { using dlhandle = std::unique_ptr; public: - function_table_t &operator[](oneapi::mkl::device key) { - auto lib = tables.find(key); + function_table_t &operator[](std::pair device_queue_pair) { + auto lib = tables.find(device_queue_pair.first); if (lib != tables.end()) return lib->second; - return add_table(key); + return add_table(device_queue_pair.first, device_queue_pair.second); } private: +#ifdef ENABLE_PORTBLAS_BACKEND + static constexpr bool is_generic_device_supported = true; +#else + static constexpr bool is_generic_device_supported = false; +#endif + #ifdef _WIN64 // Create a string with last error message std::string GetLastErrorStdStr() { @@ -90,7 +96,7 @@ class table_initializer { } #endif - function_table_t &add_table(oneapi::mkl::device key) { + function_table_t &add_table(oneapi::mkl::device key, sycl::queue &q) { dlhandle handle; // check all available libraries for the key(device) for (const char *libname : libraries[domain_id][key]) { @@ -99,8 +105,13 @@ class table_initializer { break; } if (!handle) { - std::cerr << ERROR_MSG << '\n'; - throw mkl::backend_not_found(); + if (!is_generic_device_supported && key == oneapi::mkl::device::generic_device) { + throw mkl::unsupported_device("", "", q.get_device()); + } + else { + std::cerr << ERROR_MSG << '\n'; + throw mkl::backend_not_found(); + } } auto t = reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); diff --git a/src/lapack/lapack_loader.cpp b/src/lapack/lapack_loader.cpp index 43fe349d1..f26e5f5ad 100644 --- a/src/lapack/lapack_loader.cpp +++ b/src/lapack/lapack_loader.cpp @@ -35,162 +35,178 @@ void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std:: sycl::buffer &e, sycl::buffer> &tauq, sycl::buffer> &taup, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer> &tauq, sycl::buffer> &taup, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, @@ -198,8 +214,8 @@ void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd j sycl::buffer> &u, std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, @@ -207,341 +223,363 @@ void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd j sycl::buffer> &u, std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -549,8 +587,8 @@ void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -558,8 +596,8 @@ void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -567,8 +605,8 @@ void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -576,8 +614,8 @@ void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -585,8 +623,8 @@ void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -594,196 +632,199 @@ void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, float *d, float *e, std::complex *tauq, std::complex *taup, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *d, double *e, double *tauq, double *taup, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *d, float *e, float *tauq, float *taup, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, double *d, double *e, std::complex *tauq, std::complex *taup, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, double *b, std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, float *s, std::complex *u, std::int64_t ldu, std::complex *vt, std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, @@ -791,407 +832,422 @@ sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::j std::int64_t ldu, std::complex *vt, std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, float *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cheevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cheevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, double *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zheevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zheevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, float *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].chegvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].chegvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, double *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zhegvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zhegvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, float *d, float *e, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].chetrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].chetrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, double *d, double *e, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zhetrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zhetrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].chetrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].chetrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zhetrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zhetrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sormtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dormtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *w, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsyevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsyevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *w, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssyevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssyevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsygvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsygvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssygvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssygvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *d, double *e, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsytrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsytrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *d, float *e, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssytrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssytrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].csytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].csytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zsytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zsytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ctrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ctrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dtrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dtrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].strtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].strtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ztrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ztrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, @@ -1199,8 +1255,9 @@ sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::s std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, @@ -1208,70 +1265,71 @@ sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::s std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, @@ -1279,9 +1337,9 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, @@ -1289,9 +1347,9 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, @@ -1299,9 +1357,9 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, @@ -1309,149 +1367,153 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sorgqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dorgqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cungqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zungqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, float *tau, std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, double *tau, std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1459,9 +1521,9 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1469,27 +1531,27 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1497,9 +1559,9 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1507,45 +1569,45 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float *a, @@ -1553,7 +1615,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t stride_ipiv, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].sgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1564,7 +1626,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].dgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1575,7 +1637,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].cgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1586,7 +1648,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].zgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1595,31 +1657,31 @@ sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t stride_a, float *tau, std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, std::int64_t stride_a, double *tau, std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].spotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].dpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, @@ -1627,7 +1689,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_a, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].cpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, @@ -1635,7 +1697,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_a, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].zpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, @@ -1643,18 +1705,18 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, @@ -1662,9 +1724,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, @@ -1672,9 +1734,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, @@ -1682,9 +1744,9 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, @@ -1692,27 +1754,27 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, std::int64_t *lda, float **tau, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, std::int64_t *lda, double **tau, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, @@ -1720,9 +1782,9 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, @@ -1730,79 +1792,79 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1810,9 +1872,9 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1820,9 +1882,9 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1831,9 +1893,9 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1842,79 +1904,79 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, float **a, std::int64_t *lda, float **tau, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sorgqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, double **a, std::int64_t *lda, double **tau, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dorgqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].spotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dpotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cpotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zpotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::complex **a, @@ -1922,9 +1984,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::complex **a, @@ -1932,9 +1994,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex **a, @@ -1942,9 +2004,9 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cungqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex **a, @@ -1952,92 +2014,92 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zungqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } template <> std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].sgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].sgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].dgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].dgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2046,8 +2108,8 @@ std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].cgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].cgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2056,64 +2118,66 @@ std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].zgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].zgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].sgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].dgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].cgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].zgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2121,7 +2185,8 @@ std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].cgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].cgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2129,21 +2194,24 @@ std::int64_t getrs_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].zgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].zgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2151,8 +2219,8 @@ std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].chegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].chegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2160,270 +2228,278 @@ std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].zhegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].zhegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].chetrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].chetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].chetrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].chetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].sormrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].sormrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].dormrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].dormrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].sormqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].sormqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].dormqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].dormqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].sormtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].sormtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].dormtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].dormtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].spotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].spotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].spotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].spotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dpotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cpotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].cpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zpotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].csytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].csytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].ssygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].ssygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dsygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].dsygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].strtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].strtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dtrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].dtrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2432,8 +2508,8 @@ std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].ctrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].ctrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2442,8 +2518,8 @@ std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].ztrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].ztrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2451,7 +2527,8 @@ std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].cungbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].cungbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2459,33 +2536,34 @@ std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].zungbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].zungbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].cungqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].cungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].zungqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].zungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cungtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].cungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zungtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libkey, @@ -2494,8 +2572,8 @@ std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libk std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].cunmrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].cunmrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libkey, @@ -2504,8 +2582,8 @@ std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device lib std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].zunmrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].zunmrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2514,8 +2592,8 @@ std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libk std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].cunmqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].cunmqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2524,8 +2602,8 @@ std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device lib std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].zunmqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].zunmqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2534,8 +2612,8 @@ std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].cunmtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].cunmtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2544,68 +2622,68 @@ std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].zunmtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].zunmtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].sgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].sgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].dgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].dgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].cgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].cgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].zgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].zgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].sgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].sgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].dgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].dgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].cgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].cgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].zgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].zgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2614,7 +2692,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].sgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].sgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2624,7 +2702,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].dgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].dgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2632,7 +2710,7 @@ std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].cgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].cgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2640,7 +2718,7 @@ std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].zgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].zgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2648,60 +2726,60 @@ std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].sgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].sgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].dgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].dgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].cgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].cgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].zgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].zgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].spotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].spotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].dpotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].dpotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].cpotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].cpotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].zpotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].zpotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2709,7 +2787,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].spotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].spotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2718,7 +2796,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].dpotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].dpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2726,7 +2804,7 @@ std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].cpotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].cpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2734,7 +2812,7 @@ std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].zpotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].zpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2742,46 +2820,46 @@ std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].sorgqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].sorgqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].dorgqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].dorgqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].cungqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].cungqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].zungqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].zungqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].sgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].dgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2789,8 +2867,8 @@ std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::devic std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].cgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2798,24 +2876,24 @@ std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::devi std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].zgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].sgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].dgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2823,8 +2901,8 @@ std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::devic std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].cgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2832,8 +2910,8 @@ std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::devi std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].zgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2841,8 +2919,8 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].sgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2850,40 +2928,40 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].dgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].cgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].zgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].sgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].dgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2891,8 +2969,8 @@ std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::devic std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].cgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2900,54 +2978,54 @@ std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::devi std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].zgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sorgqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].sorgqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dorgqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].dorgqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].spotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].spotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dpotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].dpotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cpotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].cpotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zpotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].zpotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2955,8 +3033,8 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].spotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].spotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2964,38 +3042,38 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dpotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].dpotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cpotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].cpotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zpotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].zpotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cungqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].cungqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zungqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].zungqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } } //namespace detail diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp index 1734287ee..68e3a5ba5 100644 --- a/src/rng/rng_loader.cpp +++ b/src/rng/rng_loader.cpp @@ -31,21 +31,21 @@ static oneapi::mkl::detail::table_initializer engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue, std::uint64_t seed) { - return function_tables[libkey].create_philox4x32x10_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_philox4x32x10_sycl(queue, seed); } engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue, std::initializer_list seed) { - return function_tables[libkey].create_philox4x32x10_ex_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_philox4x32x10_ex_sycl(queue, seed); } engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue, std::uint32_t seed) { - return function_tables[libkey].create_mrg32k3a_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_mrg32k3a_sycl(queue, seed); } engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue, std::initializer_list seed) { - return function_tables[libkey].create_mrg32k3a_ex_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_mrg32k3a_ex_sycl(queue, seed); } } // namespace detail diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp index bac3f8c83..7a20e0ed8 100644 --- a/tests/unit_tests/main_test.cpp +++ b/tests/unit_tests/main_test.cpp @@ -101,17 +101,19 @@ int main(int argc, char** argv) { auto plat_devs = plat.get_devices(); for (auto dev : plat_devs) { try { - /* Do not test for OpenCL backend on GPU */ - if (dev.is_gpu() && plat.get_info().find( - "OpenCL") != std::string::npos) + unsigned int vendor_id = + static_cast(dev.get_info()); + /* Do not test for OpenCL backend on Intel GPU */ + if (dev.is_gpu() && + plat.get_info().find("OpenCL") != + std::string::npos && + vendor_id == INTEL_ID) continue; if (unique_devices.find(dev.get_info()) == unique_devices.end()) { unique_devices.insert(dev.get_info()); - unsigned int vendor_id = static_cast( - dev.get_info()); #if !defined(ENABLE_MKLCPU_BACKEND) && !defined(ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \ - !defined(ENABLE_PORTFFT_BACKEND) + !defined(ENABLE_PORTFFT_BACKEND) && !defined(ENABLE_NETLIB_BACKEND) if (dev.is_cpu()) continue; #endif @@ -151,14 +153,6 @@ int main(int argc, char** argv) { #endif } -#if defined(ENABLE_MKLCPU_BACKEND) || defined(ENABLE_NETLIB_BACKEND) || \ - defined(ENABLE_PORTBLAS_BACKEND_INTEL_CPU) -#ifdef __HIPSYCL__ - local_devices.push_back(sycl::device(sycl::cpu_selector())); -#else - local_devices.push_back(sycl::device(sycl::cpu_selector_v)); -#endif -#endif #define GET_NAME(d) (d).template get_info() for (auto& local_dev : local_devices) { // Test only unique devices