From 44bb0680455420239ce3499dc22d8b2ceaa9eb19 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 13 Jan 2025 22:15:20 +0100 Subject: [PATCH] Remove legacy correlate, dot, and multiply from backend (#2183) Removing `dot` and `multiply` legacy implementations and related legacy utilities from backend as no longer needed. --- dpnp/backend/CMakeLists.txt | 2 - .../include/dpnp_gen_2arg_3type_tbl.hpp | 121 ---- dpnp/backend/include/dpnp_iface.hpp | 142 ----- dpnp/backend/include/dpnp_iface_fptr.hpp | 20 +- dpnp/backend/kernels/dpnp_krnl_common.cpp | 510 ----------------- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 517 ------------------ dpnp/backend/kernels/dpnp_krnl_statistics.cpp | 200 ------- dpnp/backend/src/constants.cpp | 43 -- dpnp/backend/src/constants.hpp | 54 -- dpnp/backend/src/dpnp_fptr.hpp | 7 - dpnp/backend/src/dpnp_iface_fptr.cpp | 21 - dpnp/backend/src/dpnp_utils.hpp | 31 -- dpnp/dpnp_algo/CMakeLists.txt | 1 - dpnp/dpnp_algo/dpnp_algo.pxd | 25 - dpnp/dpnp_algo/dpnp_algo.pyx | 18 - dpnp/dpnp_algo/dpnp_algo_statistics.pxi | 84 --- 16 files changed, 7 insertions(+), 1789 deletions(-) delete mode 100644 dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp delete mode 100644 dpnp/backend/kernels/dpnp_krnl_statistics.cpp delete mode 100644 dpnp/backend/src/constants.cpp delete mode 100644 dpnp/backend/src/constants.hpp delete mode 100644 dpnp/dpnp_algo/dpnp_algo_statistics.pxi diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index 1e53fac6c6a..da11cc5f026 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -31,8 +31,6 @@ set(DPNP_SRC kernels/dpnp_krnl_mathematical.cpp kernels/dpnp_krnl_random.cpp kernels/dpnp_krnl_sorting.cpp - kernels/dpnp_krnl_statistics.cpp - src/constants.cpp src/dpnp_iface_fptr.cpp src/memory_sycl.cpp src/queue_sycl.cpp diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp deleted file mode 100644 index 3e987fb83ed..00000000000 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ /dev/null @@ -1,121 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -/* - * This header file contains single argument element wise functions definitions - * - * Macro `MACRO_2ARG_3TYPES_OP` must be defined before usage - * - * Parameters: - * - public name of the function and kernel name - * - operation used to calculate the result - * - vector operation over SYCL group used to calculate the result - * - list of types vector operation accepts - * - mkl operation used to calculate the result - * - list of types OneMKL operation accepts - * - */ - -#ifndef MACRO_2ARG_3TYPES_OP -#error "MACRO_2ARG_3TYPES_OP is not defined" -#endif - -#ifdef _SECTION_DOCUMENTATION_GENERATION_ - -#define MACRO_2ARG_3TYPES_OP(__name__, __operation__, __vec_operation__, \ - __vec_types__, __mkl_operation__, __mkl_types__) \ - /** @ingroup BACKEND_API */ \ - /** @brief Per element operation function __name__ */ \ - /** */ \ - /** Function "__name__" executes operator "__operation__" over \ - * corresponding elements of input arrays */ \ - /** */ \ - /** @param[in] q_ref Reference to SYCL queue. */ \ - /** @param[out] result_out Output array. */ \ - /** @param[in] result_size Output array size. */ \ - /** @param[in] result_ndim Number of output array dimensions. \ - */ \ - /** @param[in] result_shape Output array shape. */ \ - /** @param[in] result_strides Output array strides. */ \ - /** @param[in] input1_in Input array 1. */ \ - /** @param[in] input1_size Input array 1 size. */ \ - /** @param[in] input1_ndim Number of input array 1 dimensions. \ - */ \ - /** @param[in] input1_shape Input array 1 shape. */ \ - /** @param[in] input1_strides Input array 1 strides. */ \ - /** @param[in] input2_in Input array 2. */ \ - /** @param[in] input2_size Input array 2 size. */ \ - /** @param[in] input2_ndim Number of input array 2 dimensions. \ - */ \ - /** @param[in] input2_shape Input array 2 shape. */ \ - /** @param[in] input2_strides Input array 2 strides. */ \ - /** @param[in] where Where condition. */ \ - /** @param[in] dep_event_vec_ref Reference to vector of SYCL events. \ - */ \ - template \ - DPCTLSyclEventRef __name__( \ - DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size, \ - const size_t result_ndim, const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const void *input2_in, \ - const size_t input2_size, const size_t input2_ndim, \ - const shape_elem_type *input2_shape, \ - const shape_elem_type *input2_strides, const size_t *where, \ - const DPCTLEventVectorRef dep_event_vec_ref); \ - \ - template \ - void __name__( \ - void *result_out, const size_t result_size, const size_t result_ndim, \ - const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const void *input2_in, \ - const size_t input2_size, const size_t input2_ndim, \ - const shape_elem_type *input2_shape, \ - const shape_elem_type *input2_strides, const size_t *where) - -#endif - -// "multiply" needs to be standalone kernel (not autogenerated) due to complex -// algorithm. This is not an element wise. pytest -// "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3" -// requires multiplication shape1[10] with shape2[10,1] and result expected as -// shape[10,10] -MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, - input1_elem *input2_elem, - x1 *x2, - MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), - oneapi::mkl::vm::mul, - MACRO_UNPACK_TYPES(float, - double, - std::complex, - std::complex)) - -#undef MACRO_2ARG_3TYPES_OP diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index d0852886319..1269ac77095 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -100,69 +100,6 @@ INP_DLLEXPORT void dpnp_memory_memcpy_c(DPCTLSyclQueueRef q_ref, INP_DLLEXPORT void dpnp_memory_memcpy_c(void *dst, const void *src, size_t size_in_bytes); -/** - * @ingroup BACKEND_API - * @brief Custom implementation of dot function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [out] result_out Output array. - * @param [in] result_size Size of output array. - * @param [in] result_ndim Number of output array dimensions. - * @param [in] result_shape Shape of output array. - * @param [in] result_strides Strides of output array. - * @param [in] input1_in First input array. - * @param [in] input1_size Size of first input array. - * @param [in] input1_ndim Number of first input array dimensions. - * @param [in] input1_shape Shape of first input array. - * @param [in] input1_strides Strides of first input array. - * @param [in] input2_in Second input array. - * @param [in] input2_size Size of second input array. - * @param [in] input2_ndim Number of second input array dimensions. - * @param [in] input2_shape Shape of second input array. - * @param [in] input2_strides Strides of second input array. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_dot_c(DPCTLSyclQueueRef q_ref, - void *result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type *result_shape, - const shape_elem_type *result_strides, - const void *input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type *input1_shape, - const shape_elem_type *input1_strides, - const void *input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type *input2_shape, - const shape_elem_type *input2_strides, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_dot_c(void *result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type *result_shape, - const shape_elem_type *result_strides, - const void *input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type *input1_shape, - const shape_elem_type *input1_strides, - const void *input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type *input2_shape, - const shape_elem_type *input2_strides); - /** * @ingroup BACKEND_API * @brief Return a partitioned copy of an array. @@ -195,54 +132,6 @@ INP_DLLEXPORT void dpnp_partition_c(void *array, const shape_elem_type *shape, const size_t ndim); -/** - * @ingroup BACKEND_API - * @brief correlate function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [out] result_out Output array. - * @param [in] input1_in First input array. - * @param [in] input1_size Size of first input array. - * @param [in] input1_shape Shape of first input array. - * @param [in] input1_shape_ndim Number of first array dimensions. - * @param [in] input2_in Second input array. - * @param [in] input2_size Shape of second input array. - * @param [in] input2_shape Shape of first input array. - * @param [in] input2_shape_ndim Number of second array dimensions. - * @param [in] where Mask array. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_correlate_c(DPCTLSyclQueueRef q_ref, - void *result_out, - const void *input1_in, - const size_t input1_size, - const shape_elem_type *input1_shape, - const size_t input1_shape_ndim, - const void *input2_in, - const size_t input2_size, - const shape_elem_type *input2_shape, - const size_t input2_shape_ndim, - const size_t *where, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_correlate_c(void *result_out, - const void *input1_in, - const size_t input1_size, - const shape_elem_type *input1_shape, - const size_t input1_shape_ndim, - const void *input2_in, - const size_t input2_size, - const shape_elem_type *input2_shape, - const size_t input2_shape_ndim, - const size_t *where); - /** * @ingroup BACKEND_API * @brief Construct an array from an index array and a list of arrays to choose @@ -319,37 +208,6 @@ INP_DLLEXPORT void dpnp_initval_c(void *result1, void *value, size_t size); #include -#define MACRO_2ARG_3TYPES_OP(__name__, __operation__, __vec_operation__, \ - __vec_types__, __mkl_operation__, __mkl_types__) \ - template \ - INP_DLLEXPORT DPCTLSyclEventRef __name__( \ - DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size, \ - const size_t result_ndim, const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const void *input2_in, \ - const size_t input2_size, const size_t input2_ndim, \ - const shape_elem_type *input2_shape, \ - const shape_elem_type *input2_strides, const size_t *where, \ - const DPCTLEventVectorRef dep_event_vec_ref); \ - \ - template \ - INP_DLLEXPORT void __name__( \ - void *result_out, const size_t result_size, const size_t result_ndim, \ - const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const void *input2_in, \ - const size_t input2_size, const size_t input2_ndim, \ - const shape_elem_type *input2_shape, \ - const shape_elem_type *input2_strides, const size_t *where); - -#include - /** * @ingroup BACKEND_API * @brief modf function. diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 701277c818c..21df2c35dc1 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -58,18 +58,13 @@ */ enum class DPNPFuncName : size_t { - DPNP_FN_NONE, /**< Very first element of the enumeration */ - DPNP_FN_CHOOSE, /**< Used in numpy.choose() impl */ - DPNP_FN_CHOOSE_EXT, /**< Used in numpy.choose() impl, requires extra - parameters */ - DPNP_FN_CORRELATE, /**< Used in numpy.correlate() impl */ - DPNP_FN_CORRELATE_EXT, /**< Used in numpy.correlate() impl, requires extra - parameters */ - DPNP_FN_DOT, /**< Used in numpy.dot() impl */ - DPNP_FN_DOT_EXT, /**< Used in numpy.dot() impl, requires extra parameters */ - DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ - DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra - parameters */ + DPNP_FN_NONE, /**< Very first element of the enumeration */ + DPNP_FN_CHOOSE, /**< Used in numpy.choose() impl */ + DPNP_FN_CHOOSE_EXT, /**< Used in numpy.choose() impl, requires extra + parameters */ + DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ + DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra + parameters */ DPNP_FN_INITVAL, /**< Used in numpy ones, ones_like, zeros, zeros_like impls */ DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like @@ -77,7 +72,6 @@ enum class DPNPFuncName : size_t DPNP_FN_MODF, /**< Used in numpy.modf() impl */ DPNP_FN_MODF_EXT, /**< Used in numpy.modf() impl, requires extra parameters */ - DPNP_FN_MULTIPLY, /**< Used in numpy.multiply() impl */ DPNP_FN_ONES, /**< Used in numpy.ones() impl */ DPNP_FN_ONES_LIKE, /**< Used in numpy.ones_like() impl */ DPNP_FN_PARTITION, /**< Used in numpy.partition() impl */ diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index ee78c560a55..017af8a8c45 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -35,448 +35,6 @@ #include "queue_sycl.hpp" #include -/** - * Version of SYCL DPC++ 2025.1 compiler where support of - * sycl::ext::oneapi::experimental::properties was added. - */ -#ifndef __SYCL_COMPILER_REDUCTION_PROPERTIES_SUPPORT -#define __SYCL_COMPILER_REDUCTION_PROPERTIES_SUPPORT 20241208L -#endif - -namespace mkl_blas = oneapi::mkl::blas; -namespace mkl_blas_cm = oneapi::mkl::blas::column_major; -namespace mkl_blas_rm = oneapi::mkl::blas::row_major; -namespace mkl_lapack = oneapi::mkl::lapack; - -#if __SYCL_COMPILER_VERSION >= __SYCL_COMPILER_REDUCTION_PROPERTIES_SUPPORT -namespace syclex = sycl::ext::oneapi::experimental; -#endif - -template -class dpnp_dot_c_kernel; - -template -sycl::event dot(sycl::queue &queue, - _DataType_output *result_out, - _DataType_input1 *input1_in, - _DataType_input2 *input2_in, - size_t input1_strides, - size_t input2_strides, - size_t size, - const std::vector &dependencies = {}) -{ - (void)dependencies; - - sycl::event event; - - if constexpr ((std::is_same<_DataType_input1, double>::value || - std::is_same<_DataType_input1, float>::value) && - std::is_same<_DataType_input2, _DataType_input1>::value && - std::is_same<_DataType_output, _DataType_input1>::value) - { - event = oneapi::mkl::blas::dot(queue, size, input1_in, - input1_strides, // input1 stride - input2_in, - input2_strides, // input2 stride - result_out); - } - else { -#if LIBSYCL_VERSION_GREATER(5, 3, 0) - event = queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::range<1>{size}, - sycl::reduction( - result_out, sycl::plus<_DataType_output>(), -#if __SYCL_COMPILER_VERSION >= __SYCL_COMPILER_REDUCTION_PROPERTIES_SUPPORT - syclex::properties(syclex::initialize_to_identity) -#else - sycl::property::reduction::initialize_to_identity {} -#endif - ), - [=](sycl::id<1> idx, auto &sum) { - sum += static_cast<_DataType_output>( - input1_in[idx * input1_strides]) * - static_cast<_DataType_output>( - input2_in[idx * input2_strides]); - }); - }); - // for some reason few such kernels cannot work in parallel - // looks like a bug in level0 because with opencl works fine - // that is why we call wait here - event.wait(); -#else - _DataType_output *local_mem = reinterpret_cast<_DataType_output *>( - sycl::malloc_shared(size * sizeof(_DataType_output), queue)); - - // what about reduction?? - sycl::range<1> gws(size); - - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t index = global_id[0]; - local_mem[index] = input1_in[index * input1_strides] * - input2_in[index * input2_strides]; - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - event = queue.submit(kernel_func); - - event.wait(); - - auto policy = - oneapi::dpl::execution::make_device_policy>(queue); - - _DataType_output accumulator = 0; - accumulator = - std::reduce(policy, local_mem, local_mem + size, - _DataType_output(0), std::plus<_DataType_output>()); - policy.queue().wait(); - - queue.memcpy(result_out, &accumulator, sizeof(_DataType_output)).wait(); - - sycl::free(local_mem, queue); -#endif - } - return event; -} - -template -DPCTLSyclEventRef dpnp_dot_c(DPCTLSyclQueueRef q_ref, - void *result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type *result_shape, - const shape_elem_type *result_strides, - const void *input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type *input1_shape, - const shape_elem_type *input1_strides, - const void *input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type *input2_shape, - const shape_elem_type *input2_strides, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - sycl::queue q = *(reinterpret_cast(q_ref)); - - _DataType_input1 *input1 = - static_cast<_DataType_input1 *>(const_cast(input1_in)); - _DataType_input2 *input2 = - static_cast<_DataType_input2 *>(const_cast(input2_in)); - _DataType_output *result = reinterpret_cast<_DataType_output *>(result_out); - - if (!input1_size || !input2_size) { - _DataType_output val = _DataType_output(0); - dpnp_initval_c<_DataType_output>(result, &val, result_size); - return event_ref; - } - - // scalar - if ((input1_ndim == 0) || (input2_ndim == 0)) { - // there is no support of strides in multiply function - // so result can be wrong if input array has non-standard (c-contiguous) - // strides - dpnp_multiply_c<_DataType_output, _DataType_input1, _DataType_input2>( - result, result_size, result_ndim, result_shape, result_strides, - input1_in, input1_size, input1_ndim, input1_shape, input1_strides, - input2_in, input2_size, input2_ndim, input2_shape, input2_strides, - NULL); - return event_ref; - } - - // if both arrays are vectors - if ((input1_ndim == 1) && (input2_ndim == 1)) { - assert(input1_size == input2_size); - - sycl::event event = dot(q, result, input1, input2, input1_strides[0], - input2_strides[0], input1_size); - - event_ref = reinterpret_cast(&event); - return DPCTLEvent_Copy(event_ref); - } - - // 1D vector - size_t ext_input1_ndim = input1_ndim == 1 ? 2 : input1_ndim; - shape_elem_type *ext_input1_shape = new shape_elem_type[ext_input1_ndim]; - shape_elem_type *ext_input1_strides = new shape_elem_type[ext_input1_ndim]; - if (input1_ndim == 1) { - ext_input1_shape[0] = 1; - ext_input1_shape[1] = input1_shape[0]; - ext_input1_strides[0] = 0; - ext_input1_strides[1] = input1_strides[0]; - } - else { - for (size_t i = 0; i < ext_input1_ndim; ++i) { - ext_input1_shape[i] = input1_shape[i]; - ext_input1_strides[i] = input1_strides[i]; - } - } - size_t ext_input2_ndim = input2_ndim == 1 ? 2 : input2_ndim; - shape_elem_type *ext_input2_shape = new shape_elem_type[ext_input2_ndim]; - shape_elem_type *ext_input2_strides = new shape_elem_type[ext_input2_ndim]; - if (input2_ndim == 1) { - ext_input2_shape[0] = input2_shape[0]; - ext_input2_shape[1] = 1; - ext_input2_strides[0] = input2_strides[0]; - ext_input2_strides[1] = 0; - } - else { - for (size_t i = 0; i < ext_input2_ndim; ++i) { - ext_input2_shape[i] = input2_shape[i]; - ext_input2_strides[i] = input2_strides[i]; - } - } - size_t ext_result_ndim = - ((input1_ndim == 1) || (input2_ndim == 1)) ? 2 : result_ndim; - shape_elem_type *ext_result_shape = new shape_elem_type[ext_result_ndim]; - shape_elem_type *ext_result_strides = new shape_elem_type[ext_result_ndim]; - if ((input1_ndim == 1) || (input2_ndim == 1)) { - ext_result_shape[0] = ext_input1_shape[0]; - ext_result_shape[1] = ext_input2_shape[1]; - ext_result_strides[0] = 0; - ext_result_strides[1] = result_strides[0]; - } - else { - for (size_t i = 0; i < ext_result_ndim; ++i) { - ext_result_shape[i] = result_shape[i]; - ext_result_strides[i] = result_strides[i]; - } - } - - // check if GEMM can be executed (types) - if constexpr ((std::is_same<_DataType_input1, double>::value || - std::is_same<_DataType_input1, float>::value) && - std::is_same<_DataType_input2, _DataType_input1>::value && - std::is_same<_DataType_output, _DataType_input1>::value) - { - // check if GEMM can be executed (strides) - // TODO: rewrite the condition in general case for ndims > 2 - // (looks like there are such another cases) - if (ext_input1_ndim == 2 && ext_input2_ndim == 2) { - // OneMKL gemm supports only arrays contiguous on inner dimension, - // so stride for at least one dimension should be equal to 1 - if ((ext_input1_strides[0] == 1 || ext_input1_strides[1] == 1) && - (ext_input2_strides[0] == 1 || ext_input2_strides[1] == 1) && - (ext_result_strides[0] == 1 || ext_result_strides[1] == 1)) - { - const bool isRowmA = - (ext_input1_strides[1] == 1 || ext_input1_strides[0] == 0); - const bool isRowmB = - (ext_input2_strides[1] == 1 || ext_input2_strides[1] == 0); - const bool isRowmC = - (ext_result_strides[1] == 1 || ext_result_strides[0] == 0); - - oneapi::mkl::transpose transA = - (isRowmA != isRowmC) ? oneapi::mkl::transpose::trans - : oneapi::mkl::transpose::nontrans; - oneapi::mkl::transpose transB = - (isRowmB != isRowmC) ? oneapi::mkl::transpose::trans - : oneapi::mkl::transpose::nontrans; - - const size_t size_m = ext_input1_shape[0]; - const size_t size_n = ext_input2_shape[1]; - const size_t size_k = ext_input1_shape[1]; - - auto getLdaLdc = [](const bool isRown, shape_elem_type *strides, - shape_elem_type *shapes) { - if (isRown) { - return (strides[0] != 0) ? strides[0] : shapes[1]; - } - return strides[1]; - }; - - const std::int64_t lda = static_cast( - getLdaLdc(isRowmA, ext_input1_strides, ext_input1_shape)); - const std::int64_t ldb = static_cast( - isRowmB ? ext_input2_strides[0] : ext_input2_strides[1]); - const std::int64_t ldc = static_cast( - getLdaLdc(isRowmC, ext_result_strides, ext_result_shape)); - - constexpr _DataType_output alpha = 1; - constexpr _DataType_output beta = 0; - - std::stringstream error_msg; - std::int64_t info = 0; - - try { - if (isRowmC) { - mkl_blas_rm::gemm(q, transA, transB, size_m, size_n, - size_k, alpha, input1, lda, input2, - ldb, beta, result, ldc) - .wait(); - } - else { - mkl_blas_cm::gemm(q, transA, transB, size_m, size_n, - size_k, alpha, input1, lda, input2, - ldb, beta, result, ldc) - .wait(); - } - } catch (mkl_lapack::exception const &e) { - error_msg << "Unexpected MKL exception caught during " - "gemm() call:\nreason: " - << e.what() << "\ninfo: " << e.info(); - info = e.info(); - } catch (const std::exception &e) { - error_msg << "Unexpected SYCL exception caught during " - "gemm() call:\n" - << e.what(); - info = -1; - } - - if (info != 0) // an unexpected error occurs - { - throw std::runtime_error(error_msg.str()); - } - - delete[] ext_input1_shape; - delete[] ext_input1_strides; - delete[] ext_input2_shape; - delete[] ext_input2_strides; - delete[] ext_result_shape; - delete[] ext_result_strides; - return event_ref; - } - } - } - - std::vector dot_events; - dot_events.reserve(result_size); - - size_t dot_st1 = ext_input1_strides[ext_input1_ndim - 1]; - size_t dot_st2 = ext_input2_strides[ext_input2_ndim - 2]; - size_t dot_size = ext_input1_shape[ext_input1_ndim - 1]; - - shape_elem_type *res_coords = new shape_elem_type[ext_result_ndim]; - shape_elem_type *result_offsets = new shape_elem_type[ext_result_ndim]; - get_shape_offsets_inkernel(ext_result_shape, ext_result_ndim, - result_offsets); - - for (size_t i = 0; i < result_size; ++i) { - get_xyz_by_id(i, ext_result_ndim, result_offsets, res_coords); - - _DataType_output *dot_res = result + i; - - _DataType_input1 *dot_in1 = input1; - for (size_t j = 0; j < ext_input1_ndim - 1; ++j) { - dot_in1 = dot_in1 + res_coords[j] * ext_input1_strides[j]; - } - - _DataType_input2 *dot_in2 = input2; - for (size_t j = 0; j < ext_input2_ndim - 2; ++j) { - dot_in2 = dot_in2 + res_coords[ext_input1_ndim - 1 + j] * - ext_input2_strides[j]; - } - dot_in2 = dot_in2 + res_coords[ext_input1_ndim + ext_input2_ndim - 3] * - ext_input2_strides[ext_input2_ndim - 1]; - - dot_events.push_back( - dot(q, dot_res, dot_in1, dot_in2, dot_st1, dot_st2, dot_size)); - } - - sycl::event::wait(dot_events); - - delete[] res_coords; - delete[] result_offsets; - delete[] ext_input1_shape; - delete[] ext_input1_strides; - delete[] ext_input2_shape; - delete[] ext_input2_strides; - delete[] ext_result_shape; - delete[] ext_result_strides; - - return event_ref; -} - -template -void dpnp_dot_c(void *result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type *result_shape, - const shape_elem_type *result_strides, - const void *input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type *input1_shape, - const shape_elem_type *input1_strides, - const void *input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type *input2_shape, - const shape_elem_type *input2_strides) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>( - q_ref, result_out, result_size, result_ndim, result_shape, - result_strides, input1_in, input1_size, input1_ndim, input1_shape, - input1_strides, input2_in, input2_size, input2_ndim, input2_shape, - input2_strides, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_dot_default_c)(void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *) = - dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>; - -template -DPCTLSyclEventRef (*dpnp_dot_ext_c)(DPCTLSyclQueueRef, - void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const DPCTLEventVectorRef) = - dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>; - template class dpnp_initval_c_kernel; @@ -533,74 +91,6 @@ DPCTLSyclEventRef (*dpnp_initval_ext_c)(DPCTLSyclQueueRef, void func_map_init_linalg(func_map_t &fmap) { - - fmap[DPNPFuncName::DPNP_FN_DOT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_dot_default_c}; - fmap[DPNPFuncName::DPNP_FN_DOT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_default_c}; - - // needed for "dpnp_correlate_c" function in dpnp_krnl_statistics.cpp - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_BLN][eft_BLN] = { eft_BLN, (void *)dpnp_initval_default_c}; fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_INT][eft_INT] = { diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index d900d6a0ef3..2576f6d0bb7 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -260,526 +260,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) return; } -#define MACRO_2ARG_3TYPES_OP(__name__, __operation__, __vec_operation__, \ - __vec_types__, __mkl_operation__, __mkl_types__) \ - template \ - class __name__##_kernel; \ - \ - template \ - class __name__##_sg_kernel; \ - \ - template \ - class __name__##_broadcast_kernel; \ - \ - template \ - class __name__##_strides_kernel; \ - \ - template \ - DPCTLSyclEventRef __name__( \ - DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size, \ - const size_t result_ndim, const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const void *input2_in, \ - const size_t input2_size, const size_t input2_ndim, \ - const shape_elem_type *input2_shape, \ - const shape_elem_type *input2_strides, const size_t *where, \ - const DPCTLEventVectorRef dep_event_vec_ref) \ - { \ - /* avoid warning unused variable*/ \ - (void)where; \ - (void)dep_event_vec_ref; \ - \ - DPCTLSyclEventRef event_ref = nullptr; \ - \ - if (!input1_size || !input2_size) { \ - return event_ref; \ - } \ - \ - sycl::queue q = *(reinterpret_cast(q_ref)); \ - \ - _DataType_input1 *input1_data = \ - static_cast<_DataType_input1 *>(const_cast(input1_in)); \ - _DataType_input2 *input2_data = \ - static_cast<_DataType_input2 *>(const_cast(input2_in)); \ - _DataType_output *result = \ - static_cast<_DataType_output *>(result_out); \ - \ - bool use_broadcasting = !array_equal(input1_shape, input1_ndim, \ - input2_shape, input2_ndim); \ - \ - shape_elem_type *input1_shape_offsets = \ - new shape_elem_type[input1_ndim]; \ - \ - get_shape_offsets_inkernel(input1_shape, input1_ndim, \ - input1_shape_offsets); \ - bool use_strides = !array_equal(input1_strides, input1_ndim, \ - input1_shape_offsets, input1_ndim); \ - delete[] input1_shape_offsets; \ - \ - shape_elem_type *input2_shape_offsets = \ - new shape_elem_type[input2_ndim]; \ - \ - get_shape_offsets_inkernel(input2_shape, input2_ndim, \ - input2_shape_offsets); \ - use_strides = \ - use_strides || !array_equal(input2_strides, input2_ndim, \ - input2_shape_offsets, input2_ndim); \ - delete[] input2_shape_offsets; \ - \ - sycl::event event; \ - sycl::range<1> gws(result_size); \ - \ - if (use_broadcasting) { \ - DPNPC_id<_DataType_input1> *input1_it; \ - const size_t input1_it_size_in_bytes = \ - sizeof(DPNPC_id<_DataType_input1>); \ - input1_it = reinterpret_cast *>( \ - dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); \ - new (input1_it) \ - DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, \ - input1_strides, input1_ndim); \ - \ - input1_it->broadcast_to_shape(result_shape, result_ndim); \ - \ - DPNPC_id<_DataType_input2> *input2_it; \ - const size_t input2_it_size_in_bytes = \ - sizeof(DPNPC_id<_DataType_input2>); \ - input2_it = reinterpret_cast *>( \ - dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); \ - new (input2_it) \ - DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, \ - input2_strides, input2_ndim); \ - \ - input2_it->broadcast_to_shape(result_shape, result_ndim); \ - \ - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /* for (size_t i = 0; i < \ - result_size; ++i) */ \ - { \ - const _DataType_output input1_elem = (*input1_it)[i]; \ - const _DataType_output input2_elem = (*input2_it)[i]; \ - result[i] = __operation__; \ - } \ - }; \ - auto kernel_func = [&](sycl::handler &cgh) { \ - cgh.parallel_for>( \ - gws, kernel_parallel_for_func); \ - }; \ - \ - q.submit(kernel_func).wait(); \ - \ - input1_it->~DPNPC_id(); \ - input2_it->~DPNPC_id(); \ - \ - return event_ref; \ - } \ - else if (use_strides) { \ - if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim)) \ - { \ - throw std::runtime_error( \ - "Result ndim=" + std::to_string(result_ndim) + \ - " mismatches with either input1 ndim=" + \ - std::to_string(input1_ndim) + \ - " or input2 ndim=" + std::to_string(input2_ndim)); \ - } \ - \ - /* memory transfer optimization, use USM-host for temporary speeds \ - * up transfer to device */ \ - using usm_host_allocatorT = \ - sycl::usm_allocator; \ - \ - size_t strides_size = 3 * result_ndim; \ - shape_elem_type *dev_strides_data = \ - sycl::malloc_device(strides_size, q); \ - \ - /* create host temporary for packed strides managed by shared \ - * pointer */ \ - auto strides_host_packed = \ - std::vector( \ - strides_size, usm_host_allocatorT(q)); \ - \ - /* packed vector is concatenation of result_strides, \ - * input1_strides and input2_strides */ \ - std::copy(result_strides, result_strides + result_ndim, \ - strides_host_packed.begin()); \ - std::copy(input1_strides, input1_strides + result_ndim, \ - strides_host_packed.begin() + result_ndim); \ - std::copy(input2_strides, input2_strides + result_ndim, \ - strides_host_packed.begin() + 2 * result_ndim); \ - \ - auto copy_strides_ev = q.copy( \ - strides_host_packed.data(), dev_strides_data, \ - strides_host_packed.size()); \ - \ - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t output_id = \ - global_id[0]; /* for (size_t i = 0; i < result_size; ++i) \ - */ \ - { \ - const shape_elem_type *result_strides_data = \ - &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = \ - &dev_strides_data[result_ndim]; \ - const shape_elem_type *input2_strides_data = \ - &dev_strides_data[2 * result_ndim]; \ - \ - size_t input1_id = 0; \ - size_t input2_id = 0; \ - \ - for (size_t i = 0; i < result_ndim; ++i) { \ - const size_t output_xyz_id = \ - get_xyz_id_by_id_inkernel(output_id, \ - result_strides_data, \ - result_ndim, i); \ - input1_id += output_xyz_id * input1_strides_data[i]; \ - input2_id += output_xyz_id * input2_strides_data[i]; \ - } \ - \ - const _DataType_output input1_elem = \ - input1_data[input1_id]; \ - const _DataType_output input2_elem = \ - input2_data[input2_id]; \ - result[output_id] = __operation__; \ - } \ - }; \ - auto kernel_func = [&](sycl::handler &cgh) { \ - cgh.depends_on(copy_strides_ev); \ - cgh.parallel_for>( \ - gws, kernel_parallel_for_func); \ - }; \ - \ - q.submit(kernel_func).wait(); \ - \ - sycl::free(dev_strides_data, q); \ - return event_ref; \ - } \ - else { \ - if constexpr (both_types_are_same<_DataType_input1, \ - _DataType_input2, \ - __mkl_types__>) \ - { \ - if (q.get_device().has(sycl::aspect::fp64)) { \ - event = __mkl_operation__(q, result_size, input1_data, \ - input2_data, result); \ - \ - event_ref = reinterpret_cast(&event); \ - return DPCTLEvent_Copy(event_ref); \ - } \ - } \ - \ - if constexpr (none_of_both_types< \ - _DataType_input1, _DataType_input2, \ - std::complex, std::complex>) \ - { \ - constexpr size_t lws = 64; \ - constexpr unsigned int vec_sz = 8; \ - \ - auto gws_range = sycl::range<1>( \ - ((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * \ - lws); \ - auto lws_range = sycl::range<1>(lws); \ - \ - auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ - auto sg = nd_it.get_sub_group(); \ - const auto max_sg_size = sg.get_max_local_range()[0]; \ - const size_t start = \ - vec_sz * \ - (nd_it.get_group(0) * nd_it.get_local_range(0) + \ - sg.get_group_id()[0] * max_sg_size); \ - \ - if (is_aligned(input1_data) && \ - is_aligned(input2_data) && \ - is_aligned(result) && \ - (start + static_cast(vec_sz) * max_sg_size < \ - result_size)) \ - { \ - auto input1_multi_ptr = sycl::address_space_cast< \ - sycl::access::address_space::global_space, \ - sycl::access::decorated::yes>( \ - &input1_data[start]); \ - auto input2_multi_ptr = sycl::address_space_cast< \ - sycl::access::address_space::global_space, \ - sycl::access::decorated::yes>( \ - &input2_data[start]); \ - auto result_multi_ptr = sycl::address_space_cast< \ - sycl::access::address_space::global_space, \ - sycl::access::decorated::yes>(&result[start]); \ - \ - sycl::vec<_DataType_output, vec_sz> res_vec; \ - \ - if constexpr (both_types_are_any_of<_DataType_input1, \ - _DataType_input2, \ - __vec_types__>) \ - { \ - if constexpr (both_types_are_same< \ - _DataType_input1, \ - _DataType_input2, \ - _DataType_output>) \ - { \ - sycl::vec<_DataType_input1, vec_sz> x1{}; \ - sycl::vec<_DataType_input2, vec_sz> x2{}; \ - \ - group_load(sg, input1_multi_ptr, x1, striped); \ - group_load(sg, input2_multi_ptr, x2, striped); \ - \ - res_vec = __vec_operation__; \ - } \ - else /* input types don't match result type, so \ - explicit casting is required */ \ - { \ - sycl::vec<_DataType_input1, vec_sz> tmp_x1{}; \ - sycl::vec<_DataType_input2, vec_sz> tmp_x2{}; \ - \ - group_load(sg, input1_multi_ptr, tmp_x1, \ - striped); \ - group_load(sg, input2_multi_ptr, tmp_x2, \ - striped); \ - \ - sycl::vec<_DataType_output, vec_sz> x1 = \ - dpnp_vec_cast<_DataType_output, \ - _DataType_input1, vec_sz>( \ - tmp_x1); \ - sycl::vec<_DataType_output, vec_sz> x2 = \ - dpnp_vec_cast<_DataType_output, \ - _DataType_input2, vec_sz>( \ - tmp_x2); \ - \ - res_vec = __vec_operation__; \ - } \ - } \ - else { \ - sycl::vec<_DataType_input1, vec_sz> x1{}; \ - sycl::vec<_DataType_input2, vec_sz> x2{}; \ - \ - group_load(sg, input1_multi_ptr, x1, striped); \ - group_load(sg, input2_multi_ptr, x2, striped); \ - \ - for (size_t k = 0; k < vec_sz; ++k) { \ - const _DataType_output input1_elem = x1[k]; \ - const _DataType_output input2_elem = x2[k]; \ - res_vec[k] = __operation__; \ - } \ - } \ - group_store(sg, res_vec, result_multi_ptr, striped); \ - } \ - else { \ - for (size_t k = start + sg.get_local_id()[0]; \ - k < result_size; k += max_sg_size) { \ - const _DataType_output input1_elem = \ - input1_data[k]; \ - const _DataType_output input2_elem = \ - input2_data[k]; \ - result[k] = __operation__; \ - } \ - } \ - }; \ - \ - auto kernel_func = [&](sycl::handler &cgh) { \ - cgh.parallel_for>( \ - sycl::nd_range<1>(gws_range, lws_range), \ - kernel_parallel_for_func); \ - }; \ - event = q.submit(kernel_func); \ - } \ - else /* either input1 or input2 has complex type */ { \ - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /* for (size_t i = 0; i < \ - result_size; ++i) */ \ - \ - const _DataType_output input1_elem = input1_data[i]; \ - const _DataType_output input2_elem = input2_data[i]; \ - result[i] = __operation__; \ - }; \ - auto kernel_func = [&](sycl::handler &cgh) { \ - cgh.parallel_for>(gws, kernel_parallel_for_func); \ - }; \ - event = q.submit(kernel_func); \ - } \ - } \ - \ - event_ref = reinterpret_cast(&event); \ - return DPCTLEvent_Copy(event_ref); \ - } \ - \ - template \ - void __name__( \ - void *result_out, const size_t result_size, const size_t result_ndim, \ - const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const void *input2_in, \ - const size_t input2_size, const size_t input2_ndim, \ - const shape_elem_type *input2_shape, \ - const shape_elem_type *input2_strides, const size_t *where) \ - { \ - DPCTLSyclQueueRef q_ref = \ - reinterpret_cast(&DPNP_QUEUE); \ - DPCTLEventVectorRef dep_event_vec_ref = nullptr; \ - DPCTLSyclEventRef event_ref = \ - __name__<_DataType_output, _DataType_input1, _DataType_input2>( \ - q_ref, result_out, result_size, result_ndim, result_shape, \ - result_strides, input1_in, input1_size, input1_ndim, \ - input1_shape, input1_strides, input2_in, input2_size, \ - input2_ndim, input2_shape, input2_strides, where, \ - dep_event_vec_ref); \ - DPCTLEvent_WaitAndThrow(event_ref); \ - DPCTLEvent_Delete(event_ref); \ - } \ - \ - template \ - void (*__name__##_default)( \ - void *, const size_t, const size_t, const shape_elem_type *, \ - const shape_elem_type *, const void *, const size_t, const size_t, \ - const shape_elem_type *, const shape_elem_type *, const void *, \ - const size_t, const size_t, const shape_elem_type *, \ - const shape_elem_type *, const size_t *) = \ - __name__<_DataType_output, _DataType_input1, _DataType_input2>; \ - \ - template \ - DPCTLSyclEventRef (*__name__##_ext)( \ - DPCTLSyclQueueRef, void *, const size_t, const size_t, \ - const shape_elem_type *, const shape_elem_type *, const void *, \ - const size_t, const size_t, const shape_elem_type *, \ - const shape_elem_type *, const void *, const size_t, const size_t, \ - const shape_elem_type *, const shape_elem_type *, const size_t *, \ - const DPCTLEventVectorRef) = \ - __name__<_DataType_output, _DataType_input1, _DataType_input2>; - -#include - -static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) -{ - // Used in dpnp_dot_c - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_BLN] = { - eft_BLN, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_INT] = { - eft_INT, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_LNG] = { - eft_LNG, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_FLT] = { - eft_FLT, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_DBL] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_INT][eft_BLN] = { - eft_INT, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_LNG][eft_BLN] = { - eft_LNG, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_FLT][eft_BLN] = { - eft_FLT, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_DBL][eft_BLN] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_multiply_c_default}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_BLN] = { - eft_C64, (void *)dpnp_multiply_c_default, - std::complex, bool>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_INT] = { - eft_C64, (void *)dpnp_multiply_c_default, - std::complex, int32_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_LNG] = { - eft_C64, (void *)dpnp_multiply_c_default, - std::complex, int64_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_FLT] = { - eft_C64, (void *)dpnp_multiply_c_default, - std::complex, float>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_DBL] = { - eft_C128, (void *)dpnp_multiply_c_default, - std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_C64] = { - eft_C64, - (void *)dpnp_multiply_c_default< - std::complex, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C64][eft_C128] = { - eft_C128, - (void *)dpnp_multiply_c_default< - std::complex, std::complex, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_BLN] = { - eft_C128, (void *)dpnp_multiply_c_default, - std::complex, bool>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_INT] = { - eft_C128, - (void *)dpnp_multiply_c_default, - std::complex, int32_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_LNG] = { - eft_C128, - (void *)dpnp_multiply_c_default, - std::complex, int64_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_FLT] = { - eft_C128, (void *)dpnp_multiply_c_default, - std::complex, float>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_DBL] = { - eft_C128, - (void *)dpnp_multiply_c_default, - std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C64] = { - eft_C128, - (void *)dpnp_multiply_c_default< - std::complex, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C128] = { - eft_C128, - (void *)dpnp_multiply_c_default< - std::complex, std::complex, std::complex>}; - - return; -} - void func_map_init_elemwise(func_map_t &fmap) { func_map_init_elemwise_1arg_1type(fmap); - func_map_init_elemwise_2arg_3type(fmap); return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp deleted file mode 100644 index 21bf08f2986..00000000000 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ /dev/null @@ -1,200 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include - -#include "dpnp_fptr.hpp" -#include "dpnp_utils.hpp" -#include "dpnpc_memory_adapter.hpp" -#include "queue_sycl.hpp" -#include - -namespace mkl_blas = oneapi::mkl::blas::row_major; -namespace mkl_stats = oneapi::mkl::stats; - -template -class dpnp_correlate_c_kernel; - -template -DPCTLSyclEventRef dpnp_correlate_c(DPCTLSyclQueueRef q_ref, - void *result_out, - const void *input1_in, - const size_t input1_size, - const shape_elem_type *input1_shape, - const size_t input1_shape_ndim, - const void *input2_in, - const size_t input2_size, - const shape_elem_type *input2_shape, - const size_t input2_shape_ndim, - const size_t *where, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - (void)where; - - shape_elem_type dummy[] = {1}; - return dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>( - q_ref, result_out, - 42, // dummy result_size - 42, // dummy result_ndim - NULL, // dummy result_shape - NULL, // dummy result_strides - input1_in, input1_size, input1_shape_ndim, input1_shape, - dummy, // dummy input1_strides - input2_in, input2_size, input2_shape_ndim, input2_shape, - dummy, // dummy input2_strides - dep_event_vec_ref); -} - -template -void dpnp_correlate_c(void *result_out, - const void *input1_in, - const size_t input1_size, - const shape_elem_type *input1_shape, - const size_t input1_shape_ndim, - const void *input2_in, - const size_t input2_size, - const shape_elem_type *input2_shape, - const size_t input2_shape_ndim, - const size_t *where) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_correlate_c<_DataType_output, _DataType_input1, _DataType_input2>( - q_ref, result_out, input1_in, input1_size, input1_shape, - input1_shape_ndim, input2_in, input2_size, input2_shape, - input2_shape_ndim, where, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_correlate_default_c)(void *, - const void *, - const size_t, - const shape_elem_type *, - const size_t, - const void *, - const size_t, - const shape_elem_type *, - const size_t, - const size_t *) = - dpnp_correlate_c<_DataType_output, _DataType_input1, _DataType_input2>; - -template -DPCTLSyclEventRef (*dpnp_correlate_ext_c)(DPCTLSyclQueueRef, - void *, - const void *, - const size_t, - const shape_elem_type *, - const size_t, - const void *, - const size_t, - const shape_elem_type *, - const size_t, - const size_t *, - const DPCTLEventVectorRef) = - dpnp_correlate_c<_DataType_output, _DataType_input1, _DataType_input2>; - -void func_map_init_statistics(func_map_t &fmap) -{ - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_default_c}; - - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_correlate_ext_c}; - - return; -} diff --git a/dpnp/backend/src/constants.cpp b/dpnp/backend/src/constants.cpp deleted file mode 100644 index 8658305ae1c..00000000000 --- a/dpnp/backend/src/constants.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include - -#include "constants.hpp" - -void *python_constants::py_none = nullptr; -void *python_constants::py_nan = nullptr; - -void dpnp_python_constants_initialize_c(void *_py_none, void *_py_nan) -{ - python_constants::py_none = _py_none; - python_constants::py_nan = _py_nan; - // std::cout << "========dpnp_python_constants_initialize_c=============" - // << std::endl; std::cout << "\t None=" << _py_none - // << "\n\t NaN=" << _py_nan - // << "\n\t py_none=" << python_constants::py_none - // << "\n\t py_nan=" << python_constants::py_nan - // << std::endl; -} diff --git a/dpnp/backend/src/constants.hpp b/dpnp/backend/src/constants.hpp deleted file mode 100644 index 9d3b390700b..00000000000 --- a/dpnp/backend/src/constants.hpp +++ /dev/null @@ -1,54 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once -#ifndef CONSTANTS_H // Cython compatibility -#define CONSTANTS_H - -#include "dpnp_iface.hpp" - -/** - * This is container for the constants from Python interpreter and other - * modules. These constants are subject to use in algorithms. - */ -struct python_constants -{ - static void *py_none; /**< Python None */ - static void *py_nan; /**< Python NAN or NumPy.nan */ -}; - -/** - * @ingroup BACKEND_API - * @brief Python constants initialization in the backend. - * - * Global values from Python to use in algorithms. - * - * @param [in] py_none Python NONE representation - * @param [in] py_nan Python NAN representation - */ -INP_DLLEXPORT void dpnp_python_constants_initialize_c(void *py_none, - void *py_nan); - -#endif // CONSTANTS_H diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index 3ddb5e8768a..2ba85c40c38 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -156,12 +156,6 @@ static auto dpnp_vec_cast(const sycl::vec &s) s, Indices{}); } -/** - * Removes parentheses for a passed list of types separated by comma. - * It's intended to be used in operations macro. - */ -#define MACRO_UNPACK_TYPES(...) __VA_ARGS__ - /** * Implements std::is_same<> with variadic number of types to compare with * and when type T has to match only one of types Ts. @@ -271,6 +265,5 @@ void func_map_init_linalg(func_map_t &fmap); void func_map_init_mathematical(func_map_t &fmap); void func_map_init_random(func_map_t &fmap); void func_map_init_sorting(func_map_t &fmap); -void func_map_init_statistics(func_map_t &fmap); #endif // BACKEND_FPTR_H diff --git a/dpnp/backend/src/dpnp_iface_fptr.cpp b/dpnp/backend/src/dpnp_iface_fptr.cpp index 0790eb9f65a..0b64df1f95c 100644 --- a/dpnp/backend/src/dpnp_iface_fptr.cpp +++ b/dpnp/backend/src/dpnp_iface_fptr.cpp @@ -76,26 +76,6 @@ DPNPFuncData_t get_dpnp_function_ptr(DPNPFuncName func_name, return func_info; } -template -void (*dpnp_dot_default_c)(void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *) = - dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>; - /** * This operator is needed for compatibility with Cython 0.29 which has a bug in * Enum handling @@ -122,7 +102,6 @@ static func_map_t func_map_init() func_map_init_mathematical(fmap); func_map_init_random(fmap); func_map_init_sorting(fmap); - func_map_init_statistics(fmap); return fmap; }; diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp index c13c15ad8f0..9f8da7f97cc 100644 --- a/dpnp/backend/src/dpnp_utils.hpp +++ b/dpnp/backend/src/dpnp_utils.hpp @@ -115,37 +115,6 @@ void get_shape_offsets_inkernel(const _DataType *shape, return; } -/** - * @ingroup BACKEND_UTILS - * @brief Calculation of indices in array - * - * Calculates indices of element in array with given linear position - * for example: - * idx = 5, shape = (2, 3), ndim = 2, - * indices xyz should be [1, 1] - * - * @param [in] idx linear index of the element in multy-D array. - * @param [in] ndim number of dimensions. - * @param [in] shape offsets of array. - * @param [out] xyz indices. - */ -template -void get_xyz_by_id(size_t idx, - size_t ndim, - const _DataType *offsets, - _DataType *xyz) -{ - size_t quotient; - size_t remainder = idx; - - for (size_t i = 0; i < ndim; ++i) { - quotient = remainder / offsets[i]; - remainder = remainder - quotient * offsets[i]; - xyz[i] = quotient; - } - return; -} - /** * @ingroup BACKEND_UTILS * @brief Calculate xyz id for given axis from linear index diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt index 17163836ea8..df9c1e26ea7 100644 --- a/dpnp/dpnp_algo/CMakeLists.txt +++ b/dpnp/dpnp_algo/CMakeLists.txt @@ -1,6 +1,5 @@ set(dpnp_algo_pyx_deps - ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_statistics.pxi ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_mathematical.pxi ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pxi diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 1e5a3125217..072ba2ae03e 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -34,7 +34,6 @@ from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this namespace for Enum import cdef enum DPNPFuncName "DPNPFuncName": DPNP_FN_CHOOSE_EXT - DPNP_FN_CORRELATE_EXT DPNP_FN_ERF_EXT DPNP_FN_MODF_EXT DPNP_FN_PARTITION_EXT @@ -96,18 +95,6 @@ cdef extern from "dpnp_iface_fptr.hpp": DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except + - -cdef extern from "constants.hpp": - void dpnp_python_constants_initialize_c(void * py_none, void * py_nan) - -cdef extern from "dpnp_iface.hpp": - - char * dpnp_memory_alloc_c(size_t size_in_bytes) except + - void dpnp_memory_free_c(void * ptr) - void dpnp_memory_memcpy_c(void * dst, const void * src, size_t size_in_bytes) - void dpnp_rng_srand_c(size_t seed) - - # C function pointer to the C library template functions ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef, void *, const size_t, const size_t, @@ -116,18 +103,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_strides_t)(c_dpctl.DPCTLSyclQu const shape_elem_type * , const shape_elem_type * , const long * , const c_dpctl.DPCTLEventVectorRef) -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const void * , - const size_t, - const shape_elem_type * , - const size_t, - const void *, - const size_t, - const shape_elem_type * , - const size_t, - const long * , - const c_dpctl.DPCTLEventVectorRef) """ diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 80ef853396f..718953d3144 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -54,7 +54,6 @@ import operator import numpy __all__ = [ - "dpnp_queue_initialize", ] @@ -62,23 +61,6 @@ include "dpnp_algo_indexing.pxi" include "dpnp_algo_mathematical.pxi" include "dpnp_algo_sorting.pxi" include "dpnp_algo_special.pxi" -include "dpnp_algo_statistics.pxi" - - -cpdef dpnp_queue_initialize(): - """ - Initialize SYCL queue which will be used for any library operations. - It takes visible time and needs to be done in the module loading procedure. - """ - cdef time_t seed_from_time - - dpnp_python_constants_initialize_c(< void*> None, - < void * > dpnp.nan) - - # TODO: - # choose seed number as is in numpy - seed_from_time = time(NULL) - dpnp_rng_srand_c(< size_t > seed_from_time) """ diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi deleted file mode 100644 index 57e5d732c12..00000000000 --- a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi +++ /dev/null @@ -1,84 +0,0 @@ -# cython: language_level=3 -# cython: linetrace=True -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2025, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -"""Module Backend (Statistics part) - -This module contains interface functions between C backend layer -and the rest of the library - -""" - -# NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file - -__all__ += [ - "dpnp_correlate", -] - - -cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_descriptor x2): - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(x2.dtype) - - cdef shape_type_c x1_shape = x1.shape - cdef shape_type_c x2_shape = x2.shape - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CORRELATE_EXT, param1_type, param2_type) - - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1, x2) - - # create result array with type given by FPTR data - cdef shape_type_c result_shape = (1,) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_2in_1out_t func = kernel_data.ptr - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - result.get_data(), - x1.get_data(), - x1.size, - x1_shape.data(), - x1_shape.size(), - x2.get_data(), - x2.size, - x2_shape.data(), - x2_shape.size(), - NULL, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result