From af24ae4ce3b2f88357d53f598bd6f124a638bc8b Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Mon, 3 Feb 2025 09:22:30 -0800 Subject: [PATCH 1/7] fix issue 2270 --- .github/workflows/array-api-skips.txt | 5 +- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 149 +++++++++----------- dpnp/tests/test_product.py | 124 ++++++++-------- 3 files changed, 140 insertions(+), 138 deletions(-) diff --git a/.github/workflows/array-api-skips.txt b/.github/workflows/array-api-skips.txt index 097e6ffef42..cccb79a51c5 100644 --- a/.github/workflows/array-api-skips.txt +++ b/.github/workflows/array-api-skips.txt @@ -26,13 +26,10 @@ array_api_tests/test_linalg.py::test_svd array_api_tests/test_linalg.py::test_qr array_api_tests/test_operators_and_elementwise_functions.py::test_clip -# unexpected result is returned +# unexpected result is returned - unmute when dpctl-1986 is resolved array_api_tests/test_operators_and_elementwise_functions.py::test_asin array_api_tests/test_operators_and_elementwise_functions.py::test_asinh # missing 'correction' keyword argument array_api_tests/test_signatures.py::test_func_signature[std] array_api_tests/test_signatures.py::test_func_signature[var] - -# arrays have different values -array_api_tests/test_linalg.py::test_linalg_tensordot diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index 128dd3b78c3..ca57373a536 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -52,18 +52,12 @@ def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"): """ - Determines the output array data type and an intermediate data type - used in performing calculations related to a specific math function. + Determines the output array data type. If dtype is ``None``, the output array data type of the operation is determined based on the Promotion Type Rule and device capabilities. Otherwise, `dtype` is used as output array dtype, if input arrays can cast to it according to the casting rule determined. If casting cannot be done, a ``TypeError`` is raised. - The intermediate data type is the data type used for performing the math - function calculations. If output array dtype is a floating-point data type, - it is also used for the intermediate data type. If output array dtype is an - integral data type, the default floating point data type of the device where - input arrays are allocated on are used for intermediate data type. Parameters ---------- @@ -78,17 +72,13 @@ def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"): Returns ------- - compute_dtype, res_dtype : - `compute_dtype` is the data type used in performing math function calculations. - The input arrays of the math function are cast to `compute_dtype` and then - the calculations are performed. + res_dtype : `res_dtype` is the output data type. When the result is obtained, it is cast to `res_dtype`. """ res_dtype = dpnp.result_type(*arrays) - default_dtype = dpnp.default_float_type(sycl_queue=sycl_queue) if dtype is not None: if dpnp.can_cast(res_dtype, dtype, casting=casting): @@ -98,11 +88,7 @@ def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"): f"Cannot cast from dtype({res_dtype}) to dtype({dtype}) with casting rule {casting}" ) - compute_dtype = ( - res_dtype if dpnp.issubdtype(res_dtype, dpnp.inexact) else default_dtype - ) - - return compute_dtype, res_dtype + return res_dtype def _copy_array(x, copy_flag=False, dtype=None, order="C"): @@ -749,17 +735,17 @@ def dpnp_dot(a, b, /, out=None, *, casting="same_kind", conjugate=False): _validate_out_array(out, exec_q) # Determine the appropriate data types - dot_dtype, res_dtype = _compute_res_dtype(a, b, sycl_queue=exec_q) + res_dtype = _compute_res_dtype(a, b, sycl_queue=exec_q) result = _create_result_array( - a, b, out, (), dot_dtype, res_usm_type, exec_q + a, b, out, (), res_dtype, res_usm_type, exec_q ) # input arrays should have the proper data type if dpnp.issubdtype(res_dtype, dpnp.inexact): # copying is needed if dtypes of input arrays are different - a = _copy_array(a, dtype=dot_dtype) - b = _copy_array(b, dtype=dot_dtype) + a = _copy_array(a, dtype=res_dtype) + b = _copy_array(b, dtype=res_dtype) _manager = dpu.SequentialOrderManager[exec_q] @@ -777,14 +763,11 @@ def dpnp_dot(a, b, /, out=None, *, casting="same_kind", conjugate=False): ) _manager.add_event_pair(ht_ev, dot_ev) else: - # oneapi::mkl::blas::dot is slow for integer data type, + # oneapi::mkl::blas::dot does not support integer dtypes, # so using dpctl.tensor.vecdot instead - dpt_a = dpnp.get_usm_ndarray(a) - dpt_b = dpnp.get_usm_ndarray(b) - result = dpnp_array._create_from_usm_ndarray(dpt.vecdot(dpt_a, dpt_b)) - - if dot_dtype != res_dtype: - result = result.astype(res_dtype, copy=False) + a_usm = dpnp.get_usm_ndarray(a) + b_usm = dpnp.get_usm_ndarray(b) + result = dpnp_array._create_from_usm_ndarray(dpt.vecdot(a_usm, b_usm)) return dpnp.get_result_array(result, out, casting=casting) @@ -902,7 +885,7 @@ def dpnp_multiplication( axes_res = normalize_axis_tuple(axes_res, len(result_shape), "axes") # Determine the appropriate data types - compute_dtype, res_dtype = _compute_res_dtype( + res_dtype = _compute_res_dtype( x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q ) @@ -998,7 +981,7 @@ def dpnp_multiplication( x2, out, res_shape, - compute_dtype, + res_dtype, res_usm_type, exec_q, res_order, @@ -1010,64 +993,72 @@ def dpnp_multiplication( elif x1.size == 0 or x2.size == 0: result.fill(0) else: - # input arrays should have the proper data type and - # their base (last 2-dimensions) to be c-contiguous or f-contiguous - x1 = _copy_array( - x1, - copy_flag=not x1_contig_flag, - dtype=compute_dtype, - order=res_order, - ) - x2 = _copy_array( - x2, - copy_flag=not x2_contig_flag, - dtype=compute_dtype, - order=res_order, - ) - - if call_flag == "gemv": - if transpose: - a_usm = dpnp.get_usm_ndarray(x2) - x_usm = dpnp.get_usm_ndarray(x1) - else: - a_usm = dpnp.get_usm_ndarray(x1) - x_usm = dpnp.get_usm_ndarray(x2) - - _manager = dpu.SequentialOrderManager[exec_q] - - ht_ev, gemv_ev = bi._gemv( - exec_q, - a_usm, - x_usm, - dpnp.get_usm_ndarray(result), - transpose, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, gemv_ev) - elif call_flag == "gemm": - result = _gemm_matmul( - exec_q, + if dpnp.issubdtype(res_dtype, dpnp.inexact): + # copying is needed if dtypes of input arrays are different or + # their base (last 2-dimensions) is not c-contiguous or f-contiguous + x1 = _copy_array( x1, - x2, - result, + copy_flag=not x1_contig_flag, + dtype=res_dtype, + order=res_order, ) - else: # call_flag == "gemm_batch" - assert call_flag == "gemm_batch" - result = _gemm_batch_matmul( - exec_q, - x1, + x2 = _copy_array( x2, - result, + copy_flag=not x2_contig_flag, + dtype=res_dtype, + order=res_order, ) + if call_flag == "gemv": + if transpose: + a_usm = dpnp.get_usm_ndarray(x2) + x_usm = dpnp.get_usm_ndarray(x1) + else: + a_usm = dpnp.get_usm_ndarray(x1) + x_usm = dpnp.get_usm_ndarray(x2) + + _manager = dpu.SequentialOrderManager[exec_q] + + ht_ev, gemv_ev = bi._gemv( + exec_q, + a_usm, + x_usm, + dpnp.get_usm_ndarray(result), + transpose, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, gemv_ev) + elif call_flag == "gemm": + result = _gemm_matmul( + exec_q, + x1, + x2, + result, + ) + else: # call_flag == "gemm_batch" + assert call_flag == "gemm_batch" + result = _gemm_batch_matmul( + exec_q, + x1, + x2, + result, + ) + else: + # oneapi::mkl::blas::gemm/gemv do not support integer dtypes, + # so using dpctl.tensor.matmul instead + x1_usm = dpnp.get_usm_ndarray(x1) + x2_usm = dpnp.get_usm_ndarray(x2) + out_usm = dpnp.get_usm_ndarray(result) + res_usm = dpt.matmul( + x1_usm, x2_usm, out=out_usm, dtype=dtype, order=order + ) + result = dpnp_array._create_from_usm_ndarray(res_usm) + if NumPy_special_case: result = dpnp.tile(result, out.shape) elif res_shape != result_shape: result = dpnp.reshape(result, result_shape) - if compute_dtype != res_dtype: - result = dpnp.astype(result, res_dtype, copy=False) - if out is None: if axes is not None: # Move the data back to the appropriate axes of the result array @@ -1207,7 +1198,7 @@ def dpnp_vecdot( ) # Determine the appropriate data types - _, res_dtype = _compute_res_dtype( + res_dtype = _compute_res_dtype( x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q ) diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index 927c3af94d0..0837ad7e4a8 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -271,7 +271,8 @@ def test_basic(self, dtype, shape1, shape2): expected = a.dot(b) assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + # Use both integer and float, to test both MKL and dpctl backends + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize("stride", [3, -1, -2, -5]) def test_strided(self, dtype, stride): a = numpy.arange(25, dtype=dtype) @@ -598,6 +599,8 @@ class TestMatmul: def setup_method(self): numpy.random.seed(42) + # Use both integer and float, to test both MKL and dpctl backends + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize( "order1, order2", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")] ) @@ -674,10 +677,7 @@ def setup_method(self): ((7, 4, 3), (0, 7, 3, 5)), ], ) - def test_basic(self, order1, order2, shape1, shape2): - # input should be float type otherwise they are copied to c-contigous array - # so testing order becomes meaningless - dtype = dpnp.default_float_type() + def test_basic(self, dtype, order1, order2, shape1, shape2): a = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1) b = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2) a = numpy.array(a, order=order1) @@ -691,15 +691,12 @@ def test_basic(self, order1, order2, shape1, shape2): @pytest.mark.parametrize( "shape1, shape2", [ + ((4,), (4, 3)), ((2, 4), (4, 3)), ((4, 2, 3), (4, 3, 5)), ((6, 7, 4, 3), (6, 7, 3, 5)), ], - ids=[ - "((2, 4), (4, 3))", - "((4, 2, 3), (4, 3, 5))", - "((6, 7, 4, 3), (6, 7, 3, 5))", - ], + ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], ) def test_bool(self, shape1, shape2): x = numpy.arange(2, dtype=numpy.bool_) @@ -829,15 +826,12 @@ def test_axes_out_1D(self, axes, b_shape, out_shape): @pytest.mark.parametrize( "shape1, shape2", [ + ((4,), (4, 3)), ((2, 4), (4, 3)), ((4, 2, 3), (4, 3, 5)), ((6, 7, 4, 3), (6, 7, 3, 5)), ], - ids=[ - "((2, 4), (4, 3))", - "((4, 2, 3), (4, 3, 5))", - "((6, 7, 4, 3), (6, 7, 3, 5))", - ], + ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], ) def test_dtype_matrix_inout(self, in_dt, out_dt, shape1, shape2): a = generate_random_numpy_array(shape1, in_dt) @@ -857,15 +851,12 @@ def test_dtype_matrix_inout(self, in_dt, out_dt, shape1, shape2): @pytest.mark.parametrize( "shape1, shape2", [ + ((4,), (4, 3)), ((2, 4), (4, 3)), ((4, 2, 3), (4, 3, 5)), ((6, 7, 4, 3), (6, 7, 3, 5)), ], - ids=[ - "((2, 4), (4, 3))", - "((4, 2, 3), (4, 3, 5))", - "((6, 7, 4, 3), (6, 7, 3, 5))", - ], + ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], ) def test_dtype_matrix_inputs(self, dtype1, dtype2, shape1, shape2): a = generate_random_numpy_array(shape1, dtype1) @@ -876,25 +867,25 @@ def test_dtype_matrix_inputs(self, dtype1, dtype2, shape1, shape2): expected = numpy.matmul(a, b) assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize("order1", ["C", "F", "A"]) @pytest.mark.parametrize("order2", ["C", "F", "A"]) @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) @pytest.mark.parametrize( "shape1, shape2", [ + ((4,), (4, 3)), ((2, 4), (4, 3)), ((4, 2, 3), (4, 3, 5)), ((6, 7, 4, 3), (6, 7, 3, 5)), ], - ids=[ - "((2, 4), (4, 3))", - "((4, 2, 3), (4, 3, 5))", - "((6, 7, 4, 3), (6, 7, 3, 5))", - ], + ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], ) - def test_order(self, order1, order2, order, shape1, shape2): - a = numpy.arange(numpy.prod(shape1)).reshape(shape1, order=order1) - b = numpy.arange(numpy.prod(shape2)).reshape(shape2, order=order2) + def test_order(self, dtype, order1, order2, order, shape1, shape2): + a = numpy.arange(numpy.prod(shape1), dtype=dtype) + b = numpy.arange(numpy.prod(shape2), dtype=dtype) + a = a.reshape(shape1, order=order1) + b = b.reshape(shape2, order=order2) ia, ib = dpnp.array(a), dpnp.array(b) result = dpnp.matmul(ia, ib, order=order) @@ -913,16 +904,17 @@ def test_order(self, order1, order2, order, shape1, shape2): assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize( "stride", [(-2, -2, -2, -2), (2, 2, 2, 2), (-2, 2, -2, 2), (2, -2, 2, -2)], ids=["-2", "2", "(-2, 2)", "(2, -2)"], ) - def test_strided1(self, stride): + def test_strided1(self, dtype, stride): for dim in [1, 2, 3, 4]: shape = tuple(20 for _ in range(dim)) - A = numpy.random.rand(*shape) - iA = dpnp.asarray(A) + A = numpy.random.rand(*shape).astype(dtype) + iA = dpnp.array(A) slices = tuple(slice(None, None, stride[i]) for i in range(dim)) a = A[slices] ia = iA[slices] @@ -938,17 +930,18 @@ def test_strided1(self, stride): assert result is iout assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize( "shape", [(10, 3, 3), (12, 10, 3, 3)], ids=["3D", "4D"] ) - @pytest.mark.parametrize("stride", [-1, -2, 2], ids=["-1", "-2", "2"]) - @pytest.mark.parametrize("transpose", [False, True], ids=["False", "True"]) - def test_strided2(self, shape, stride, transpose): + @pytest.mark.parametrize("stride", [-1, -2, 2]) + @pytest.mark.parametrize("transpose", [False, True]) + def test_strided2(self, dtype, shape, stride, transpose): # one dimension (axis=-3) is strided # if negative stride, copy is needed and the base becomes c-contiguous # otherwise the base remains the same as input in gemm_batch - A = numpy.random.rand(*shape) - iA = dpnp.asarray(A) + A = numpy.random.rand(*shape).astype(dtype) + iA = dpnp.array(A) if transpose: A = numpy.moveaxis(A, (-2, -1), (-1, -2)) iA = dpnp.moveaxis(iA, (-2, -1), (-1, -2)) @@ -967,20 +960,21 @@ def test_strided2(self, shape, stride, transpose): assert result is iout assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize( "stride", [(-2, -2), (2, 2), (-2, 2), (2, -2)], ids=["(-2, -2)", "(2, 2)", "(-2, 2)", "(2, -2)"], ) @pytest.mark.parametrize("transpose", [False, True]) - def test_strided3(self, stride, transpose): + def test_strided3(self, dtype, stride, transpose): # 4D case, the 1st and 2nd dimensions are strided # For negative stride, copy is needed and the base becomes c-contiguous. # For positive stride, no copy but reshape makes the base c-contiguous. stride0, stride1 = stride shape = (12, 10, 3, 3) # 4D array - A = numpy.random.rand(*shape) - iA = dpnp.asarray(A) + A = numpy.random.rand(*shape).astype(dtype) + iA = dpnp.array(A) if transpose: A = numpy.moveaxis(A, (-2, -1), (-1, -2)) iA = dpnp.moveaxis(iA, (-2, -1), (-1, -2)) @@ -997,11 +991,12 @@ def test_strided3(self, stride, transpose): assert_dtype_allclose(result, expected) @testing.with_requires("numpy>=2.2") + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize("func", ["matmul", "matvec"]) @pytest.mark.parametrize("incx", [-2, 2]) @pytest.mark.parametrize("incy", [-2, 2]) @pytest.mark.parametrize("transpose", [False, True]) - def test_strided_mat_vec(self, func, incx, incy, transpose): + def test_strided_mat_vec(self, dtype, func, incx, incy, transpose): # vector is strided shape = (8, 10) # 2D if transpose: @@ -1010,8 +1005,8 @@ def test_strided_mat_vec(self, func, incx, incy, transpose): else: s1 = shape[-1] s2 = shape[-2] - a = numpy.random.rand(*shape) - ia = dpnp.asarray(a) + a = numpy.random.rand(*shape).astype(dtype) + ia = dpnp.array(a) if transpose: a = numpy.moveaxis(a, (-2, -1), (-1, -2)) ia = dpnp.moveaxis(ia, (-2, -1), (-1, -2)) @@ -1032,11 +1027,12 @@ def test_strided_mat_vec(self, func, incx, incy, transpose): assert_dtype_allclose(result, expected) @testing.with_requires("numpy>=2.2") + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize("func", ["matmul", "vecmat"]) @pytest.mark.parametrize("incx", [-2, 2]) @pytest.mark.parametrize("incy", [-2, 2]) @pytest.mark.parametrize("transpose", [False, True]) - def test_strided_vec_mat(self, func, incx, incy, transpose): + def test_strided_vec_mat(self, dtype, func, incx, incy, transpose): # vector is strided shape = (8, 10) # 2D if transpose: @@ -1045,8 +1041,8 @@ def test_strided_vec_mat(self, func, incx, incy, transpose): else: s1 = shape[-1] s2 = shape[-2] - a = numpy.random.rand(*shape) - ia = dpnp.asarray(a) + a = numpy.random.rand(*shape).astype(dtype) + ia = dpnp.array(a) if transpose: a = numpy.moveaxis(a, (-2, -1), (-1, -2)) ia = dpnp.moveaxis(ia, (-2, -1), (-1, -2)) @@ -1187,7 +1183,7 @@ def test_out_0D(self, out_shape): ((5000, 5000, 2, 2), (2, 2)), ], ) - def test_large(self, shape1, shape2): + def test_large_arrays(self, shape1, shape2): a = generate_random_numpy_array(shape1) b = generate_random_numpy_array(shape2) ia, ib = dpnp.array(a), dpnp.array(b) @@ -1196,6 +1192,18 @@ def test_large(self, shape1, shape2): expected = numpy.matmul(a, b) assert_dtype_allclose(result, expected, factor=24) + @pytest.mark.parametrize("dtype", [numpy.int64, numpy.float32]) + def test_large_values(self, dtype): + a = numpy.array( + [[[94906267, 94906267], [94906267, 94906267]]], dtype=dtype + ) + b = numpy.array([94906265, 94906265], dtype=dtype) + ia, ib = dpnp.array(a), dpnp.array(b) + + result = dpnp.matmul(ia, ib) + expected = numpy.matmul(a, b) + assert_dtype_allclose(result, expected) + @testing.with_requires("numpy>=2.0") def test_linalg_matmul(self): a = numpy.ones((3, 4)) @@ -1206,6 +1214,7 @@ def test_linalg_matmul(self): expected = numpy.linalg.matmul(a, b) assert_array_equal(result, expected) + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize( "sh1, sh2", [ @@ -1214,9 +1223,9 @@ def test_linalg_matmul(self): ], ids=["gemm", "gemm_batch"], ) - def test_matmul_with_offsets(self, sh1, sh2): - a = generate_random_numpy_array(sh1) - b = generate_random_numpy_array(sh2) + def test_matmul_with_offsets(self, dtype, sh1, sh2): + a = generate_random_numpy_array(sh1, dtype) + b = generate_random_numpy_array(sh2, dtype) ia, ib = dpnp.array(a), dpnp.array(b) result = ia[1] @ ib[1] @@ -1978,10 +1987,12 @@ def test_axes_out_1D(self, axes, b_shape): expected = numpy.vecdot(a, b, axes=axes, out=out) assert_dtype_allclose(result, expected) + # Use both integer and float, to test both MKL and dpctl backends + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize("stride", [2, -1, -2]) - def test_strided(self, stride): - a = numpy.arange(100).reshape(10, 10) - b = numpy.arange(100).reshape(10, 10) + def test_strided(self, dtype, stride): + a = numpy.arange(100, dtype=dtype).reshape(10, 10) + b = numpy.arange(100, dtype=dtype).reshape(10, 10) ia, ib = dpnp.array(a), dpnp.array(b) result = dpnp.vecdot(ia[::stride, ::stride], ib[::stride, ::stride]) @@ -1999,6 +2010,7 @@ def test_input_dtype_matrix(self, dtype1, dtype2): expected = numpy.vecdot(a, b) assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) @pytest.mark.parametrize("order1", ["C", "F", "A"]) @pytest.mark.parametrize("order2", ["C", "F", "A"]) @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) @@ -2007,9 +2019,11 @@ def test_input_dtype_matrix(self, dtype1, dtype2): [(4, 3), (4, 3, 5), (6, 7, 3, 5)], ids=["(4, 3)", "(4, 3, 5)", "(6, 7, 3, 5)"], ) - def test_order(self, order1, order2, order, shape): - a = numpy.arange(numpy.prod(shape)).reshape(shape, order=order1) - b = numpy.arange(numpy.prod(shape)).reshape(shape, order=order2) + def test_order(self, dtype, order1, order2, order, shape): + a = numpy.arange(numpy.prod(shape), dtype=dtype) + b = numpy.arange(numpy.prod(shape), dtype=dtype) + a = a.reshape(shape, order=order1) + b = b.reshape(shape, order=order2) ia, ib = dpnp.array(a), dpnp.array(b) result = dpnp.vecdot(ia, ib, order=order) From 1add60add198c1791e53dfe8ececb16329eb76a5 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Wed, 5 Feb 2025 14:29:16 -0800 Subject: [PATCH 2/7] increase linux test timeout --- .github/workflows/conda-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 0e83675ba57..3ffe0c8418e 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -218,7 +218,7 @@ jobs: id: run_tests_linux uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 with: - timeout_minutes: 12 + timeout_minutes: 15 max_attempts: ${{ env.RUN_TESTS_MAX_ATTEMPTS }} retry_on: any command: | From 9d2bc0029813059ceddb299e728c87e5ea369f61 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Thu, 6 Feb 2025 08:04:22 -0800 Subject: [PATCH 3/7] address comments --- .github/workflows/check-mkl-interfaces.yaml | 2 +- .github/workflows/cron-run-tests.yaml | 2 +- dpnp/backend/extensions/blas/blas_py.cpp | 3 + dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 78 +++++-- dpnp/tests/test_product.py | 241 +++++++++----------- 5 files changed, 167 insertions(+), 159 deletions(-) diff --git a/.github/workflows/check-mkl-interfaces.yaml b/.github/workflows/check-mkl-interfaces.yaml index fe43dfd9235..65bc9d4002f 100644 --- a/.github/workflows/check-mkl-interfaces.yaml +++ b/.github/workflows/check-mkl-interfaces.yaml @@ -216,7 +216,7 @@ jobs: id: run_tests uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 with: - timeout_minutes: 12 + timeout_minutes: 15 max_attempts: ${{ env.RUN_TESTS_MAX_ATTEMPTS }} retry_on: any command: | diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml index 1376a25bfa8..942977ac439 100644 --- a/.github/workflows/cron-run-tests.yaml +++ b/.github/workflows/cron-run-tests.yaml @@ -126,7 +126,7 @@ jobs: id: run_tests_linux uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 with: - timeout_minutes: 12 + timeout_minutes: 15 max_attempts: ${{ env.RUN_TESTS_MAX_ATTEMPTS }} retry_on: any command: | diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp index 3b62ad4945e..38d0d5ce9fe 100644 --- a/dpnp/backend/extensions/blas/blas_py.cpp +++ b/dpnp/backend/extensions/blas/blas_py.cpp @@ -142,6 +142,9 @@ PYBIND11_MODULE(_blas_impl, m) py::arg("sycl_queue"), py::arg("matrixA"), py::arg("vectorX"), py::arg("vectorY"), py::arg("transpose"), py::arg("depends") = py::list()); + } + + { m.def( "_row_major_is_available", [](void) { diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index ca57373a536..d49b63d4af6 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -50,20 +50,23 @@ ] -def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"): +def _compute_res_dtype(*arrays, sycl_queue, dtype=None, out=None, casting="no"): """ Determines the output array data type. - If dtype is ``None``, the output array data type of the operation is - determined based on the Promotion Type Rule and device capabilities. - Otherwise, `dtype` is used as output array dtype, if input arrays - can cast to it according to the casting rule determined. If casting - cannot be done, a ``TypeError`` is raised. + If `dtype` and `out` are ``None``, the output array data type of the + operation is determined based on the Promotion Type Rule and device + capabilities. if `out` is given, its data type is used as the output + array dtypes. Otherwise, `dtype` is used as output array dtype. + If input arrays cannot be cast to the determined output array dtype, + a ``TypeError`` is raised. Parameters ---------- arrays : {dpnp.ndarray, usm_ndarray} Input arrays. dtype : dtype + If not ``None`` and `out` is not defined, data type of the output array. + out : {dpnp.ndarray, usm_ndarray} If not ``None``, data type of the output array. casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional Controls what kind of data casting may occur. @@ -73,13 +76,23 @@ def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"): Returns ------- res_dtype : - `res_dtype` is the output data type. When the result is obtained, it is cast - to `res_dtype`. + `res_dtype` is the output data type. When the result is obtained, + it is cast to `res_dtype`. """ res_dtype = dpnp.result_type(*arrays) + # If inputs are boolean and `out` is given and it is not boolean, the + # calculation should be performed in boolean and at the end the result + # is cast to out dtype. It is different than general case where the inputs + # are cast to out dtype and then calculation is performed. Even when inputs + # are boolean and `dtype` is given, the casting is done first and then the + # calculation is performed. + if out is not None and not dpnp.issubdtype(res_dtype, dpnp.bool): + # out dtype is prioritized over a given dtype + dtype = out.dtype + if dtype is not None: if dpnp.can_cast(res_dtype, dtype, casting=casting): res_dtype = dtype @@ -735,7 +748,9 @@ def dpnp_dot(a, b, /, out=None, *, casting="same_kind", conjugate=False): _validate_out_array(out, exec_q) # Determine the appropriate data types - res_dtype = _compute_res_dtype(a, b, sycl_queue=exec_q) + res_dtype = _compute_res_dtype( + a, b, out=out, casting=casting, sycl_queue=exec_q + ) result = _create_result_array( a, b, out, (), res_dtype, res_usm_type, exec_q @@ -886,7 +901,7 @@ def dpnp_multiplication( # Determine the appropriate data types res_dtype = _compute_res_dtype( - x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q + x1, x2, dtype=dtype, out=out, casting=casting, sycl_queue=exec_q ) call_flag = None @@ -993,7 +1008,22 @@ def dpnp_multiplication( elif x1.size == 0 or x2.size == 0: result.fill(0) else: - if dpnp.issubdtype(res_dtype, dpnp.inexact): + # TODO: replace with dpnp.int8 when it is added + x1_is_int8 = dpnp.issubdtype(x1.dtype, numpy.int8) + x2_is_int8 = dpnp.issubdtype(x2.dtype, numpy.int8) + res_is_int32 = dpnp.issubdtype(res_dtype, dpnp.int32) + special_case = x1_is_int8 and x2_is_int8 and res_is_int32 + special_case = special_case and call_flag == "gemm" + if special_case: + # OneMath supports this special case + x1 = _copy_array( + x1, copy_flag=not x1_contig_flag, order=res_order + ) + x2 = _copy_array( + x2, copy_flag=not x2_contig_flag, order=res_order + ) + result = _gemm_matmul(exec_q, x1, x2, result) + elif dpnp.issubdtype(res_dtype, dpnp.inexact): # copying is needed if dtypes of input arrays are different or # their base (last 2-dimensions) is not c-contiguous or f-contiguous x1 = _copy_array( @@ -1029,23 +1059,21 @@ def dpnp_multiplication( ) _manager.add_event_pair(ht_ev, gemv_ev) elif call_flag == "gemm": - result = _gemm_matmul( - exec_q, - x1, - x2, - result, - ) - else: # call_flag == "gemm_batch" + result = _gemm_matmul(exec_q, x1, x2, result) + else: assert call_flag == "gemm_batch" - result = _gemm_batch_matmul( - exec_q, - x1, - x2, - result, - ) + result = _gemm_batch_matmul(exec_q, x1, x2, result) else: # oneapi::mkl::blas::gemm/gemv do not support integer dtypes, # so using dpctl.tensor.matmul instead + + # `dpt.matmul` does not support `casting` kwarg. + # We may need to change input dtypes based on given `casting`. + # The possibility of casting is already validated in + # `_compute_res_dtype`. + x1 = _copy_array(x1, dtype=res_dtype, order=res_order) + x2 = _copy_array(x2, dtype=res_dtype, order=res_order) + x1_usm = dpnp.get_usm_ndarray(x1) x2_usm = dpnp.get_usm_ndarray(x2) out_usm = dpnp.get_usm_ndarray(result) @@ -1199,7 +1227,7 @@ def dpnp_vecdot( # Determine the appropriate data types res_dtype = _compute_res_dtype( - x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q + x1, x2, dtype=dtype, out=out, casting=casting, sycl_queue=exec_q ) _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1) diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index 0837ad7e4a8..c414e6fdf6e 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -3,7 +3,7 @@ import pytest from dpctl.tensor._numpy_helper import AxisError from dpctl.utils import ExecutionPlacementError -from numpy.testing import assert_array_equal, assert_raises +from numpy.testing import assert_allclose, assert_array_equal, assert_raises import dpnp @@ -15,6 +15,10 @@ ) from .third_party.cupy import testing +# A list of selected dtypes including both integer and float dtypes +# to test differennt backends: OneMath (for float) and dpctl (for integer) +_selected_dtypes = [numpy.int64, numpy.float32] + def _assert_selective_dtype_allclose(result, expected, dtype): # For numpy.dot, numpy.vdot, numpy.kron, numpy.inner, and numpy.tensordot, @@ -271,8 +275,7 @@ def test_basic(self, dtype, shape1, shape2): expected = a.dot(b) assert_dtype_allclose(result, expected) - # Use both integer and float, to test both MKL and dpctl backends - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize("stride", [3, -1, -2, -5]) def test_strided(self, dtype, stride): a = numpy.arange(25, dtype=dtype) @@ -599,8 +602,7 @@ class TestMatmul: def setup_method(self): numpy.random.seed(42) - # Use both integer and float, to test both MKL and dpctl backends - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize( "order1, order2", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")] ) @@ -678,8 +680,8 @@ def setup_method(self): ], ) def test_basic(self, dtype, order1, order2, shape1, shape2): - a = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1) - b = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2) + a = generate_random_numpy_array(shape1, dtype=dtype).reshape(shape1) + b = generate_random_numpy_array(shape2, dtype=dtype).reshape(shape2) a = numpy.array(a, order=order1) b = numpy.array(b, order=order2) ia, ib = dpnp.array(a), dpnp.array(b) @@ -688,26 +690,6 @@ def test_basic(self, dtype, order1, order2, shape1, shape2): expected = numpy.matmul(a, b) assert_dtype_allclose(result, expected) - @pytest.mark.parametrize( - "shape1, shape2", - [ - ((4,), (4, 3)), - ((2, 4), (4, 3)), - ((4, 2, 3), (4, 3, 5)), - ((6, 7, 4, 3), (6, 7, 3, 5)), - ], - ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], - ) - def test_bool(self, shape1, shape2): - x = numpy.arange(2, dtype=numpy.bool_) - a = numpy.resize(x, numpy.prod(shape1)).reshape(shape1) - b = numpy.resize(x, numpy.prod(shape2)).reshape(shape2) - ia, ib = dpnp.array(a), dpnp.array(b) - - result = dpnp.matmul(ia, ib) - expected = numpy.matmul(a, b) - assert_dtype_allclose(result, expected) - @pytest.mark.parametrize( "axes", [ @@ -819,55 +801,48 @@ def test_axes_out_1D(self, axes, b_shape, out_shape): expected = numpy.matmul(a, b, axes=axes, out=out) assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("in_dt", get_all_dtypes(no_bool=True)) - @pytest.mark.parametrize( - "out_dt", get_all_dtypes(no_bool=True, no_none=True) - ) + @pytest.mark.parametrize("dt_in1", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize("dt_in2", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize("dt_out", get_all_dtypes(no_none=True)) @pytest.mark.parametrize( "shape1, shape2", [ - ((4,), (4, 3)), + ((1, 4), (4, 3)), ((2, 4), (4, 3)), ((4, 2, 3), (4, 3, 5)), - ((6, 7, 4, 3), (6, 7, 3, 5)), ], - ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], + ids=["gemv", "gemm", "gemm_batch"], ) - def test_dtype_matrix_inout(self, in_dt, out_dt, shape1, shape2): - a = generate_random_numpy_array(shape1, in_dt) - b = generate_random_numpy_array(shape2, in_dt) + def test_dtype_matrix(self, dt_in1, dt_in2, dt_out, shape1, shape2): + a = generate_random_numpy_array(shape1, dt_in1) + b = generate_random_numpy_array(shape2, dt_in2) ia, ib = dpnp.array(a), dpnp.array(b) - if dpnp.can_cast(dpnp.result_type(ia, ib), out_dt, casting="same_kind"): - result = dpnp.matmul(ia, ib, dtype=out_dt) - expected = numpy.matmul(a, b, dtype=out_dt) + out_shape = shape1[:-2] + (shape1[-2], shape2[-1]) + out = numpy.empty(out_shape, dtype=dt_out) + iout = dpnp.array(out) + + if dpnp.can_cast(dpnp.result_type(ia, ib), dt_out, casting="same_kind"): + result = dpnp.matmul(ia, ib, dtype=dt_out) + expected = numpy.matmul(a, b, dtype=dt_out) assert_dtype_allclose(result, expected) - else: - assert_raises(TypeError, dpnp.matmul, ia, ib, dtype=out_dt) - assert_raises(TypeError, numpy.matmul, a, b, dtype=out_dt) - @pytest.mark.parametrize("dtype1", get_all_dtypes(no_bool=True)) - @pytest.mark.parametrize("dtype2", get_all_dtypes(no_bool=True)) - @pytest.mark.parametrize( - "shape1, shape2", - [ - ((4,), (4, 3)), - ((2, 4), (4, 3)), - ((4, 2, 3), (4, 3, 5)), - ((6, 7, 4, 3), (6, 7, 3, 5)), - ], - ids=["gemv", "gemm", "gemm_batch1", "gemm_batch2"], - ) - def test_dtype_matrix_inputs(self, dtype1, dtype2, shape1, shape2): - a = generate_random_numpy_array(shape1, dtype1) - b = generate_random_numpy_array(shape2, dtype2) - ia, ib = dpnp.array(a), dpnp.array(b) + result = dpnp.matmul(ia, ib, out=iout) + assert result is iout + expected = numpy.matmul(a, b, out=out) + # dpnp gives the exact same result when `dtype` or `out` is specified + # while NumPy give slightly different results. NumPy result obtained + # with `dtype` is much closer to dpnp (a smaller `tol`) while the + # result obtained with `out` needs a larger `tol` to match dpnp + assert_allclose(result, expected, rtol=1e-6, atol=1e-6) + else: + assert_raises(TypeError, dpnp.matmul, ia, ib, dtype=dt_out) + assert_raises(TypeError, numpy.matmul, a, b, dtype=dt_out) - result = dpnp.matmul(ia, ib) - expected = numpy.matmul(a, b) - assert_dtype_allclose(result, expected) + assert_raises(TypeError, dpnp.matmul, ia, ib, out=iout) + assert_raises(TypeError, numpy.matmul, a, b, out=out) - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize("order1", ["C", "F", "A"]) @pytest.mark.parametrize("order2", ["C", "F", "A"]) @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) @@ -904,7 +879,7 @@ def test_order(self, dtype, order1, order2, order, shape1, shape2): assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize( "stride", [(-2, -2, -2, -2), (2, 2, 2, 2), (-2, 2, -2, 2), (2, -2, 2, -2)], @@ -930,7 +905,7 @@ def test_strided1(self, dtype, stride): assert result is iout assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize( "shape", [(10, 3, 3), (12, 10, 3, 3)], ids=["3D", "4D"] ) @@ -960,7 +935,7 @@ def test_strided2(self, dtype, shape, stride, transpose): assert result is iout assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize( "stride", [(-2, -2), (2, 2), (-2, 2), (2, -2)], @@ -991,7 +966,7 @@ def test_strided3(self, dtype, stride, transpose): assert_dtype_allclose(result, expected) @testing.with_requires("numpy>=2.2") - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize("func", ["matmul", "matvec"]) @pytest.mark.parametrize("incx", [-2, 2]) @pytest.mark.parametrize("incy", [-2, 2]) @@ -1027,7 +1002,7 @@ def test_strided_mat_vec(self, dtype, func, incx, incy, transpose): assert_dtype_allclose(result, expected) @testing.with_requires("numpy>=2.2") - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize("func", ["matmul", "vecmat"]) @pytest.mark.parametrize("incx", [-2, 2]) @pytest.mark.parametrize("incy", [-2, 2]) @@ -1075,75 +1050,52 @@ def test_strided_vec_mat(self, dtype, func, incx, incy, transpose): ("F", "F", "C"), ], ) - @pytest.mark.parametrize( - "dtype", get_all_dtypes(no_none=True, no_bool=True) - ) - def test_out1(self, order1, order2, out_order, dtype): + @pytest.mark.parametrize("dtype", _selected_dtypes) + def test_out_order1(self, order1, order2, out_order, dtype): # test gemm with out keyword - a1 = numpy.arange(20, dtype=dtype).reshape(5, 4, order=order1) - a2 = numpy.arange(28, dtype=dtype).reshape(4, 7, order=order2) - b1, b2 = dpnp.array(a1), dpnp.array(a2) - - iout = dpnp.empty((5, 7), dtype=dtype, order=out_order) - result = dpnp.matmul(b1, b2, out=iout) - assert result is iout - - out = numpy.empty((5, 7), dtype=dtype, order=out_order) - expected = numpy.matmul(a1, a2, out=out) - assert result.flags.c_contiguous == expected.flags.c_contiguous - assert result.flags.f_contiguous == expected.flags.f_contiguous - assert_dtype_allclose(result, expected) - - @pytest.mark.parametrize("trans", [True, False]) - @pytest.mark.parametrize( - "dtype", get_all_dtypes(no_none=True, no_bool=True) - ) - def test_out2(self, trans, dtype): - # test gemm_batch with out keyword - # the base of input arrays is c-contiguous - # the base of output array is c-contiguous or f-contiguous - a = numpy.arange(24, dtype=dtype).reshape(2, 3, 4) - b = numpy.arange(40, dtype=dtype).reshape(2, 4, 5) + a = generate_random_numpy_array((5, 4), dtype) + b = generate_random_numpy_array((4, 7), dtype) + a = numpy.array(a, order=order1) + b = numpy.array(b, order=order2) ia, ib = dpnp.array(a), dpnp.array(b) - if trans: - iout = dpnp.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1) - out = numpy.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1) - else: - iout = dpnp.empty((2, 3, 5), dtype=dtype) - out = numpy.empty((2, 3, 5), dtype=dtype) - + iout = dpnp.empty((5, 7), dtype=dtype, order=out_order) result = dpnp.matmul(ia, ib, out=iout) assert result is iout + out = numpy.empty((5, 7), dtype=dtype, order=out_order) expected = numpy.matmul(a, b, out=out) assert result.flags.c_contiguous == expected.flags.c_contiguous assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("trans", [True, False]) - @pytest.mark.parametrize( - "dtype", get_all_dtypes(no_none=True, no_bool=True) - ) - def test_out3(self, trans, dtype): + @pytest.mark.parametrize("trans_in", [True, False]) + @pytest.mark.parametrize("trans_out", [True, False]) + @pytest.mark.parametrize("dtype", _selected_dtypes) + def test_out_order2(self, trans_in, trans_out, dtype): # test gemm_batch with out keyword - # the base of input arrays is f-contiguous # the base of output array is c-contiguous or f-contiguous - a = numpy.arange(24, dtype=dtype).reshape(2, 4, 3) - b = numpy.arange(40, dtype=dtype).reshape(2, 5, 4) - ia, ib = dpnp.array(a), dpnp.array(b) - - a = numpy.asarray(a).transpose(0, 2, 1) - b = numpy.asarray(b).transpose(0, 2, 1) - ia = ia.transpose(0, 2, 1) - ib = ib.transpose(0, 2, 1) + if trans_in: + # the base of input arrays is f-contiguous + a = generate_random_numpy_array((2, 4, 3), dtype) + b = generate_random_numpy_array((2, 5, 4), dtype) + ia, ib = dpnp.array(a), dpnp.array(b) + a, b = a.transpose(0, 2, 1), b.transpose(0, 2, 1) + ia, ib = ia.transpose(0, 2, 1), ib.transpose(0, 2, 1) + else: + # the base of input arrays is c-contiguous + a = generate_random_numpy_array((2, 3, 4), dtype) + b = generate_random_numpy_array((2, 4, 5), dtype) + ia, ib = dpnp.array(a), dpnp.array(b) - if trans: + if trans_out: + # the base of output array is f-contiguous iout = dpnp.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1) out = numpy.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1) else: - iout = dpnp.empty((2, 3, 5), dtype=dtype) + # the base of output array is c-contiguous out = numpy.empty((2, 3, 5), dtype=dtype) + iout = dpnp.array(out) result = dpnp.matmul(ia, ib, out=iout) assert result is iout @@ -1155,11 +1107,7 @@ def test_out3(self, trans, dtype): @pytest.mark.parametrize( "out_shape", - [ - ((4, 5)), - ((6,)), - ((4, 7, 2)), - ], + [((4, 5)), ((6,)), ((4, 7, 2))], ) def test_out_0D(self, out_shape): # for matmul of 1-D arrays, output is 0-D and if out keyword is given @@ -1192,11 +1140,10 @@ def test_large_arrays(self, shape1, shape2): expected = numpy.matmul(a, b) assert_dtype_allclose(result, expected, factor=24) - @pytest.mark.parametrize("dtype", [numpy.int64, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) def test_large_values(self, dtype): - a = numpy.array( - [[[94906267, 94906267], [94906267, 94906267]]], dtype=dtype - ) + val = 94906267 + a = numpy.array([[[val, val], [val, val]]], dtype=dtype) b = numpy.array([94906265, 94906265], dtype=dtype) ia, ib = dpnp.array(a), dpnp.array(b) @@ -1204,6 +1151,37 @@ def test_large_values(self, dtype): expected = numpy.matmul(a, b) assert_dtype_allclose(result, expected) + def test_special_case(self): + # Although inputs are int, gemm will be used for calculation + a = numpy.ones((3, 4), dtype=numpy.int8) + b = numpy.ones((4, 5), dtype=numpy.int8) + ia, ib = dpnp.array(a), dpnp.array(b) + + result = dpnp.matmul(ia, ib, dtype=numpy.int32) + expected = numpy.matmul(a, b, dtype=numpy.int32) + assert_dtype_allclose(result, expected) + + iout = dpnp.empty((3, 5), dtype=numpy.int32) + result = dpnp.matmul(ia, ib, out=iout) + assert_dtype_allclose(result, expected) + + def test_bool(self): + a = numpy.ones((3, 4), dtype=numpy.bool) + b = numpy.ones((4, 5), dtype=numpy.bool) + ia, ib = dpnp.array(a), dpnp.array(b) + + # the output is (3, 4) array filled with 4 + result = dpnp.matmul(ia, ib, dtype=numpy.int32) + expected = numpy.matmul(a, b, dtype=numpy.int32) + assert_dtype_allclose(result, expected) + + out = numpy.empty((3, 5), dtype=numpy.int32) + iout = dpnp.array(out) + # the output is (3, 4) array filled with 1 + result = dpnp.matmul(ia, ib, out=iout) + expected = numpy.matmul(a, b, out=out) + assert_dtype_allclose(result, expected) + @testing.with_requires("numpy>=2.0") def test_linalg_matmul(self): a = numpy.ones((3, 4)) @@ -1214,7 +1192,7 @@ def test_linalg_matmul(self): expected = numpy.linalg.matmul(a, b) assert_array_equal(result, expected) - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize( "sh1, sh2", [ @@ -1987,8 +1965,7 @@ def test_axes_out_1D(self, axes, b_shape): expected = numpy.vecdot(a, b, axes=axes, out=out) assert_dtype_allclose(result, expected) - # Use both integer and float, to test both MKL and dpctl backends - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize("stride", [2, -1, -2]) def test_strided(self, dtype, stride): a = numpy.arange(100, dtype=dtype).reshape(10, 10) @@ -2010,7 +1987,7 @@ def test_input_dtype_matrix(self, dtype1, dtype2): expected = numpy.vecdot(a, b) assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("dtype", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize("order1", ["C", "F", "A"]) @pytest.mark.parametrize("order2", ["C", "F", "A"]) @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) From 161c617cc6b1b74da4abd2c3acfa7c9bd5f77f9b Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Thu, 6 Feb 2025 16:57:47 -0800 Subject: [PATCH 4/7] updates for onemkl interfaces --- dpnp/backend/extensions/blas/blas_py.cpp | 14 ++++---- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 37 ++++++++++++++++----- dpnp/tests/test_product.py | 24 ++++++++----- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp index 38d0d5ce9fe..21792912025 100644 --- a/dpnp/backend/extensions/blas/blas_py.cpp +++ b/dpnp/backend/extensions/blas/blas_py.cpp @@ -146,14 +146,14 @@ PYBIND11_MODULE(_blas_impl, m) { m.def( - "_row_major_is_available", - [](void) { -#if defined(USE_ONEMKL_CUBLAS) - return false; -#else + "_using_onemkl_interfaces", + []() { +#ifdef USE_ONEMKL_INTERFACES return true; -#endif // USE_ONEMKL_CUBLAS +#else + return false; +#endif }, - "Check if the onemkl::blas::row_major can be used."); + "Check if the OneMKL interfaces are being used."); } } diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index d49b63d4af6..30443a67159 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -503,6 +503,28 @@ def _gemm_matmul(exec_q, x1, x2, res): return res +def _gemm_special_case(x1, x2, res_dtype, call_flag): + """ + `gemm` and `gemm_batch` support these special cases of data types + while `gemv` does not. + + """ + + # TODO: replace with dpnp.int8 when it is added + x1_is_int8 = dpnp.issubdtype(x1.dtype, numpy.int8) + x2_is_int8 = dpnp.issubdtype(x2.dtype, numpy.int8) + res_is_int32 = dpnp.issubdtype(res_dtype, dpnp.int32) + res_is_float32 = dpnp.issubdtype(res_dtype, dpnp.float32) + + flag = x1_is_int8 and x2_is_int8 and (res_is_int32 or res_is_float32) + flag = flag and call_flag in ["gemm", "gemm_batch"] + + # onemkl_interfaces does not support these data types + onemkl_interfaces = bi._using_onemkl_interfaces() + + return flag and not onemkl_interfaces + + def _shape_error(shape1, shape2, func, err_msg): """Validate the shapes of input and output arrays.""" @@ -1008,21 +1030,18 @@ def dpnp_multiplication( elif x1.size == 0 or x2.size == 0: result.fill(0) else: - # TODO: replace with dpnp.int8 when it is added - x1_is_int8 = dpnp.issubdtype(x1.dtype, numpy.int8) - x2_is_int8 = dpnp.issubdtype(x2.dtype, numpy.int8) - res_is_int32 = dpnp.issubdtype(res_dtype, dpnp.int32) - special_case = x1_is_int8 and x2_is_int8 and res_is_int32 - special_case = special_case and call_flag == "gemm" - if special_case: - # OneMath supports this special case + if _gemm_special_case(x1, x2, res_dtype, call_flag): x1 = _copy_array( x1, copy_flag=not x1_contig_flag, order=res_order ) x2 = _copy_array( x2, copy_flag=not x2_contig_flag, order=res_order ) - result = _gemm_matmul(exec_q, x1, x2, result) + if call_flag == "gemm": + result = _gemm_matmul(exec_q, x1, x2, result) + else: + assert call_flag == "gemm_batch" + result = _gemm_batch_matmul(exec_q, x1, x2, result) elif dpnp.issubdtype(res_dtype, dpnp.inexact): # copying is needed if dtypes of input arrays are different or # their base (last 2-dimensions) is not c-contiguous or f-contiguous diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index c414e6fdf6e..8ebedd141d1 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -834,7 +834,7 @@ def test_dtype_matrix(self, dt_in1, dt_in2, dt_out, shape1, shape2): # while NumPy give slightly different results. NumPy result obtained # with `dtype` is much closer to dpnp (a smaller `tol`) while the # result obtained with `out` needs a larger `tol` to match dpnp - assert_allclose(result, expected, rtol=1e-6, atol=1e-6) + assert_allclose(result, expected, rtol=1e-5, atol=1e-5) else: assert_raises(TypeError, dpnp.matmul, ia, ib, dtype=dt_out) assert_raises(TypeError, numpy.matmul, a, b, dtype=dt_out) @@ -1151,23 +1151,29 @@ def test_large_values(self, dtype): expected = numpy.matmul(a, b) assert_dtype_allclose(result, expected) - def test_special_case(self): + @pytest.mark.parametrize("dt_out", [numpy.int32, numpy.float32]) + @pytest.mark.parametrize( + "shape1, shape2", + [((2, 4), (4, 3)), ((4, 2, 3), (4, 3, 5))], + ids=["gemm", "gemm_batch"], + ) + def test_special_case(self, dt_out, shape1, shape2): # Although inputs are int, gemm will be used for calculation - a = numpy.ones((3, 4), dtype=numpy.int8) - b = numpy.ones((4, 5), dtype=numpy.int8) + a = numpy.ones(shape1, dtype=numpy.int8) + b = numpy.ones(shape2, dtype=numpy.int8) ia, ib = dpnp.array(a), dpnp.array(b) - result = dpnp.matmul(ia, ib, dtype=numpy.int32) - expected = numpy.matmul(a, b, dtype=numpy.int32) + result = dpnp.matmul(ia, ib, dtype=dt_out) + expected = numpy.matmul(a, b, dtype=dt_out) assert_dtype_allclose(result, expected) - iout = dpnp.empty((3, 5), dtype=numpy.int32) + iout = dpnp.empty(result.shape, dtype=dt_out) result = dpnp.matmul(ia, ib, out=iout) assert_dtype_allclose(result, expected) def test_bool(self): - a = numpy.ones((3, 4), dtype=numpy.bool) - b = numpy.ones((4, 5), dtype=numpy.bool) + a = numpy.ones((3, 4), dtype=dpnp.bool) + b = numpy.ones((4, 5), dtype=dpnp.bool) ia, ib = dpnp.array(a), dpnp.array(b) # the output is (3, 4) array filled with 4 From 6cc6dee8c0cfe9ef74ca746ce4fd70644df057c5 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Fri, 7 Feb 2025 07:47:01 -0800 Subject: [PATCH 5/7] address new comments --- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 19 +++++++------------ dpnp/tests/test_product.py | 14 +++++++------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index 30443a67159..5767fdd4e52 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -89,7 +89,7 @@ def _compute_res_dtype(*arrays, sycl_queue, dtype=None, out=None, casting="no"): # are cast to out dtype and then calculation is performed. Even when inputs # are boolean and `dtype` is given, the casting is done first and then the # calculation is performed. - if out is not None and not dpnp.issubdtype(res_dtype, dpnp.bool): + if out is not None and res_dtype != dpnp.bool: # out dtype is prioritized over a given dtype dtype = out.dtype @@ -509,15 +509,10 @@ def _gemm_special_case(x1, x2, res_dtype, call_flag): while `gemv` does not. """ - # TODO: replace with dpnp.int8 when it is added - x1_is_int8 = dpnp.issubdtype(x1.dtype, numpy.int8) - x2_is_int8 = dpnp.issubdtype(x2.dtype, numpy.int8) - res_is_int32 = dpnp.issubdtype(res_dtype, dpnp.int32) - res_is_float32 = dpnp.issubdtype(res_dtype, dpnp.float32) - - flag = x1_is_int8 and x2_is_int8 and (res_is_int32 or res_is_float32) - flag = flag and call_flag in ["gemm", "gemm_batch"] + is_int8 = x1.dtype == numpy.int8 and x2.dtype == numpy.int8 + is_int32_or_f32 = res_dtype in [dpnp.int32, dpnp.float32] + flag = is_int8 and is_int32_or_f32 and call_flag in ["gemm", "gemm_batch"] # onemkl_interfaces does not support these data types onemkl_interfaces = bi._using_onemkl_interfaces() @@ -1084,7 +1079,8 @@ def dpnp_multiplication( result = _gemm_batch_matmul(exec_q, x1, x2, result) else: # oneapi::mkl::blas::gemm/gemv do not support integer dtypes, - # so using dpctl.tensor.matmul instead + # except for special cases determined in `_gemm_special_case`, + # use dpctl.tensor.matmul for unsupported cases # `dpt.matmul` does not support `casting` kwarg. # We may need to change input dtypes based on given `casting`. @@ -1096,10 +1092,9 @@ def dpnp_multiplication( x1_usm = dpnp.get_usm_ndarray(x1) x2_usm = dpnp.get_usm_ndarray(x2) out_usm = dpnp.get_usm_ndarray(result) - res_usm = dpt.matmul( + dpt.matmul( x1_usm, x2_usm, out=out_usm, dtype=dtype, order=order ) - result = dpnp_array._create_from_usm_ndarray(res_usm) if NumPy_special_case: result = dpnp.tile(result, out.shape) diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index 8ebedd141d1..99c211970bf 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -888,7 +888,7 @@ def test_order(self, dtype, order1, order2, order, shape1, shape2): def test_strided1(self, dtype, stride): for dim in [1, 2, 3, 4]: shape = tuple(20 for _ in range(dim)) - A = numpy.random.rand(*shape).astype(dtype) + A = generate_random_numpy_array(shape, dtype) iA = dpnp.array(A) slices = tuple(slice(None, None, stride[i]) for i in range(dim)) a = A[slices] @@ -897,13 +897,13 @@ def test_strided1(self, dtype, stride): # the 2D base is not c-contiguous nor f-contigous result = dpnp.matmul(ia, ia) expected = numpy.matmul(a, a) - assert_dtype_allclose(result, expected) + assert_dtype_allclose(result, expected, factor=16) iOUT = dpnp.empty(shape, dtype=result.dtype) iout = iOUT[slices] result = dpnp.matmul(ia, ia, out=iout) assert result is iout - assert_dtype_allclose(result, expected) + assert_dtype_allclose(result, expected, factor=16) @pytest.mark.parametrize("dtype", _selected_dtypes) @pytest.mark.parametrize( @@ -915,7 +915,7 @@ def test_strided2(self, dtype, shape, stride, transpose): # one dimension (axis=-3) is strided # if negative stride, copy is needed and the base becomes c-contiguous # otherwise the base remains the same as input in gemm_batch - A = numpy.random.rand(*shape).astype(dtype) + A = generate_random_numpy_array(shape, dtype) iA = dpnp.array(A) if transpose: A = numpy.moveaxis(A, (-2, -1), (-1, -2)) @@ -948,7 +948,7 @@ def test_strided3(self, dtype, stride, transpose): # For positive stride, no copy but reshape makes the base c-contiguous. stride0, stride1 = stride shape = (12, 10, 3, 3) # 4D array - A = numpy.random.rand(*shape).astype(dtype) + A = generate_random_numpy_array(shape, dtype) iA = dpnp.array(A) if transpose: A = numpy.moveaxis(A, (-2, -1), (-1, -2)) @@ -980,7 +980,7 @@ def test_strided_mat_vec(self, dtype, func, incx, incy, transpose): else: s1 = shape[-1] s2 = shape[-2] - a = numpy.random.rand(*shape).astype(dtype) + a = generate_random_numpy_array(shape, dtype) ia = dpnp.array(a) if transpose: a = numpy.moveaxis(a, (-2, -1), (-1, -2)) @@ -1016,7 +1016,7 @@ def test_strided_vec_mat(self, dtype, func, incx, incy, transpose): else: s1 = shape[-1] s2 = shape[-2] - a = numpy.random.rand(*shape).astype(dtype) + a = generate_random_numpy_array(shape, dtype) ia = dpnp.array(a) if transpose: a = numpy.moveaxis(a, (-2, -1), (-1, -2)) From 2d41e151d2ebfe06c2f226835043dfb382a5536b Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad <120411540+vtavana@users.noreply.github.com> Date: Fri, 7 Feb 2025 13:45:15 -0600 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_product.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index 99c211970bf..ba305540632 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -1159,8 +1159,8 @@ def test_large_values(self, dtype): ) def test_special_case(self, dt_out, shape1, shape2): # Although inputs are int, gemm will be used for calculation - a = numpy.ones(shape1, dtype=numpy.int8) - b = numpy.ones(shape2, dtype=numpy.int8) + a = generate_random_numpy_array(shape1, dtype=numpy.int8) + b = generate_random_numpy_array(shape2, dtype=numpy.int8) ia, ib = dpnp.array(a), dpnp.array(b) result = dpnp.matmul(ia, ib, dtype=dt_out) @@ -1172,8 +1172,8 @@ def test_special_case(self, dt_out, shape1, shape2): assert_dtype_allclose(result, expected) def test_bool(self): - a = numpy.ones((3, 4), dtype=dpnp.bool) - b = numpy.ones((4, 5), dtype=dpnp.bool) + a = generate_random_numpy_array((3, 4), dtype=dpnp.bool) + b = generate_random_numpy_array((4, 5), dtype=dpnp.bool) ia, ib = dpnp.array(a), dpnp.array(b) # the output is (3, 4) array filled with 4 From 1edddbe5af69b4665c02d8860e0301f3e095498b Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Fri, 7 Feb 2025 13:13:33 -0800 Subject: [PATCH 7/7] increase timeout for windows --- .github/workflows/conda-package.yml | 2 +- .github/workflows/cron-run-tests.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 3ffe0c8418e..39f0cf21fa5 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -355,7 +355,7 @@ jobs: id: run_tests_win uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 with: - timeout_minutes: 15 + timeout_minutes: 17 max_attempts: ${{ env.RUN_TESTS_MAX_ATTEMPTS }} retry_on: any command: | diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml index 942977ac439..18ce2ca632e 100644 --- a/.github/workflows/cron-run-tests.yaml +++ b/.github/workflows/cron-run-tests.yaml @@ -143,7 +143,7 @@ jobs: id: run_tests_win uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 with: - timeout_minutes: 15 + timeout_minutes: 17 max_attempts: ${{ env.RUN_TESTS_MAX_ATTEMPTS }} retry_on: any command: |