From 6df6b49c1301af52e1652bd92e9515405ed06a0f Mon Sep 17 00:00:00 2001 From: Hannan Naeem <63553569+HannanNaeem@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:06:43 -0600 Subject: [PATCH] ufuncs: Added subtract support for 2D views (#254) * ufuncs: Added subtract support for 2D views * ufuncs: fixed type in 2d subtract implementation * tests: added test for subtract 2d * tests: forgot to uncomment rest of the ufunc test cases xD * ufuncs: added basic broadcast - to test yet * ufuncs: docstring and comments cleaned * ufuncs: added scalar -> 2d broadcast * ufuncs: Added scalar subtract impls * tests: removed keyword arguments because of updated subtract protocal (accepts scalars now) * Ufuncs: various small fixes after testing __init__.py: broadcast_view now exposed as util * ufuncs.py: Fixed minor bug in subtract * test_ufuncs.py: Added tests for subtract * ufuncs, removed asserts so pre_compile_ufuncs is not interrupted * ufuncs.py: Same size arrays are not broadcastable and will return False now * ufuncs.py: fixed bug (incorrect if condition when removing assert) * pre_compile_ufuncs: broadcast throws ValueError exception when called incorrectly. ignoring this raise * ufuncs.py: Updated subtract to bypass broadcast guard if views are same dims exactly (broadcasting was updated to be incompatible explicitly with same dims) * Addressed comments about indexing and logic * Removed team policy in favor of for loops * Added broadcast to filter list * pre_compile_ufuncs: fixed wrong logic for filtering broadcast --- pre_compile_tools/pre_compile_ufuncs.py | 2 +- pykokkos/__init__.py | 3 +- pykokkos/lib/ufuncs.py | 264 +++++++++++++++++++++--- tests/test_ufuncs.py | 70 ++++++- 4 files changed, 312 insertions(+), 27 deletions(-) diff --git a/pre_compile_tools/pre_compile_ufuncs.py b/pre_compile_tools/pre_compile_ufuncs.py index b6c6a793..0a64e457 100644 --- a/pre_compile_tools/pre_compile_ufuncs.py +++ b/pre_compile_tools/pre_compile_ufuncs.py @@ -27,7 +27,7 @@ def main(): # level kernels/workunits directly filtered_function_list = [] for f in function_list: - if not "impl" in f[0] and not f[0].startswith("_"): + if not "impl" in f[0] and not f[0].startswith("_") and "broadcast_view" not in f[0]: filtered_function_list.append(f) # TODO: expand types and view dimensions for # ufunc pre-compilation as the support diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index c8b3981e..aba6326b 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -62,7 +62,8 @@ round, trunc, ceil, - floor) + floor, + broadcast_view) from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import (zeros, zeros_like, diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 0a7a721e..70640d92 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -5,6 +5,7 @@ import numpy as np import pykokkos as pk from pykokkos.lib import ufunc_workunits +from pykokkos.interface import ViewType kernel_dict = dict(getmembers(ufunc_workunits, isfunction)) @@ -959,17 +960,150 @@ def multiply(viewA, viewB): return out +def check_broadcastable_impl(viewA, viewB): + """ + Check whether two views are broadcastable as defined here: + https://numpy.org/doc/stable/user/basics.broadcasting.html + + Parameters + ---------- + viewA : pykokkos view + Input view. + viewB : pykokkos view + Input view. + + Returns + ------- + _ : boolean + True if both views are compatible. + """ + + if viewA.shape == viewB.shape: + return False # cannot broadcast same dims + + v1_p = len(viewA.shape) -1 + v2_p = len(viewB.shape) -1 + + while v1_p > -1 and v2_p > -1: + if viewA.shape[v1_p] != viewB.shape[v2_p]: + if viewA.shape[v1_p] != 1 and viewB.shape[v2_p] != 1: + return False + + v1_p -= 1 + v2_p -= 1 + + return True + +@pk.workunit +def stretch_fill_impl_scalar_into_1d(tid, scalar, viewOut): + viewOut[tid] = scalar + +@pk.workunit +def stretch_fill_impl_scalar_into_2d(tid, cols, scalar, viewOut): + for i in range(cols): + viewOut[tid][i] = scalar + +@pk.workunit +def stretch_fill_impl_1d_into_2d(tid, cols, viewIn, viewOut): + for i in range(cols): + viewOut[tid][i] = viewIn[i] + +@pk.workunit +def stretch_fill_impl_2d(tid, inner_its, col_wise, viewIn, viewOut): + for i in range(inner_its): + if col_wise: + viewOut[i][tid] = viewIn[i][0] + else: + viewOut[tid][i] = viewIn[0][i] + + + +def broadcast_view(val, viewB): + """ + Broadcasts val onto viewB, returns the "stretched" version of viewA + + Parameters + ---------- + val : pykokkos view or Scalar + View or scalar to be broadcasted (is shorter and compatible in dimensions). + viewB : pykokkos view + View to be broadcasted onto (is longer and compatible in dimensions). + + Returns + ------- + out : pykokkos view + Broadcasted version of viewA. + + """ + if len(viewB.shape) > 2: + raise NotImplementedError("Broadcasting is only supported upto 2D views") + + is_view = False + if isinstance(val, ViewType): + for dim in val.shape: + if dim != 1: + is_view = True + + if not is_view: + val = val[0] if len(val.shape) == 1 else val[0][0] + + if is_view: + if not check_broadcastable_impl(val, viewB) or not val.shape < viewB.shape: + raise ValueError("Incompatible broadcast") + if not val.dtype == viewB.dtype: + raise ValueError("Broadcastable views must have same dtypes") + + out = pk.View(viewB.shape, viewB.dtype) + + if is_view: + # if both 2D + if len(val.shape) == 2: #viewB must be 2 because of the val.shape < viewB.shape check + # figure which orientation is val (row or col) + col_wise = 1 if val.shape[1] == 1 else 0 + inner_its = viewB.shape[0] if col_wise else viewB.shape[1] + outer_its = viewB.shape[1] if col_wise else viewB.shape[0] + pk.parallel_for(outer_its, stretch_fill_impl_2d, inner_its=inner_its, col_wise=col_wise, viewIn=val, viewOut=out) + else: # 1d to 2D + pk.parallel_for(out.shape[0], stretch_fill_impl_1d_into_2d, cols=viewB.shape[1], viewIn=val, viewOut=out) + + return out + + # scalar + + if len(viewB.shape) == 1: + out_1d = pk.View(viewB.shape) + pk.parallel_for(viewB.shape[0], stretch_fill_impl_scalar_into_1d, scalar=val, viewOut=out_1d) + return out_1d + + # else 2d + pk.parallel_for(out.shape[0], stretch_fill_impl_scalar_into_2d, cols=out.shape[1], scalar=val, viewOut=out) + return out + + @pk.workunit def subtract_impl_1d_double(tid: int, viewA: pk.View1D[pk.double], viewB: pk.View1D[pk.double], out: pk.View1D[pk.double]): out[tid] = viewA[tid] - viewB[tid] - @pk.workunit def subtract_impl_1d_float(tid: int, viewA: pk.View1D[pk.float], viewB: pk.View1D[pk.float], out: pk.View1D[pk.float]): out[tid] = viewA[tid] - viewB[tid] +@pk.workunit +def subtract_impl_2d(tid, cols, viewA, viewB, viewOut): + for i in range(cols): + viewOut[tid][i] = viewA[tid][i] - viewB[tid][i] + +@pk.workunit +def subtract_impl_scalar_1d(tid, viewA, scalar, viewOut): + viewOut[tid] = viewA[tid] - scalar + +@pk.workunit +def subtract_impl_scalar_2d(tid, cols, viewA, scalar, viewOut): + for i in range(cols): + viewOut[tid][i] = viewA[tid][i] - scalar + -def subtract(viewA, viewB): +def subtract(viewA, valB): """ Subtracts positionally corresponding elements of viewA with elements of viewB @@ -978,8 +1112,8 @@ def subtract(viewA, viewB): ---------- viewA : pykokkos view Input view. - viewB : pykokkos view - Input view. + valB : pykokkos view or scalar + Input view or scalar value. Returns ------- @@ -987,27 +1121,109 @@ def subtract(viewA, viewB): Output view. """ - if len(viewA.shape) > 1 or len(viewB.shape) > 1: - raise NotImplementedError("only 1D views currently supported for subtract() ufunc.") - if viewA.dtype.__name__ == "float64" and viewB.dtype.__name__ == "float64": - out = pk.View([viewA.shape[0]], pk.double) - pk.parallel_for( - viewA.shape[0], - subtract_impl_1d_double, - viewA=viewA, - viewB=viewB, - out=out) - elif viewA.dtype.__name__ == "float32" and viewB.dtype.__name__ == "float32": - out = pk.View([viewA.shape[0]], pk.float) - pk.parallel_for( - viewA.shape[0], - subtract_impl_1d_float, - viewA=viewA, - viewB=viewB, - out=out) - else: - raise RuntimeError("Incompatible Types") + is_scalar = True + if isinstance(valB, ViewType): + # if this is a single valued view1D or view2D just count that as a scalar + for dim in valB.shape: + if dim != 1: + is_scalar = False + + if is_scalar: + valB = valB[0] if len(valB.shape) == 1 else valB[0][0] + + if len(viewA.shape) > 2 or (not is_scalar and len(valB.shape) > 2): + raise NotImplementedError("only 1D and 2D views currently supported for subtract() ufunc.") + + if not is_scalar: + + if viewA.shape != valB.shape and not check_broadcastable_impl(viewA, valB): # if shape is not same check compatibility + raise ValueError("Views must be broadcastable") + + # check if size is same otherwise broadcast and fix + if viewA.shape < valB.shape: + viewA = broadcast_view(viewA, valB) + elif valB.shape < viewA.shape: + valB = broadcast_view(valB, viewA) + + if viewA.dtype.__name__ == "float64" and valB.dtype.__name__ == "float64": + + if len(viewA.shape) == 1: + out = pk.View(viewA.shape, pk.double) + pk.parallel_for( + viewA.shape[0], + subtract_impl_1d_double, + viewA=viewA, + viewB=valB, + out=out) + + if len(viewA.shape) == 2: + out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) + pk.parallel_for( + viewA.shape[0], + subtract_impl_2d, + cols=viewA.shape[1], + viewA=viewA, + viewB=valB, + viewOut=out) + + elif viewA.dtype.__name__ == "float32" and valB.dtype.__name__ == "float32": + + if len(viewA.shape) == 1: + out = pk.View(viewA.shape, pk.float) + pk.parallel_for( + viewA.shape[0], + subtract_impl_1d_float, + viewA=viewA, + viewB=valB, + out=out) + + if len(viewA.shape) == 2: + out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) + pk.parallel_for( + viewA.shape[0], + subtract_impl_2d, + cols=viewA.shape[1], + viewA=viewA, + viewB=valB, + viewOut=out) + else: + raise RuntimeError("Incompatible Types") + + return out + + + # is scalar subtract ----------------------- + if len(viewA.shape) == 1: # 1D + out = None + if viewA.dtype.__name__ == "float64": + out = pk.View(viewA.shape, pk.double) + if viewA.dtype.__name__ == "float32": + out = pk.View(viewA.shape, pk.float) + + if out is None: raise RuntimeError("Incompatible Types") + + pk.parallel_for(viewA.shape[0], + subtract_impl_scalar_1d, + viewA=viewA, + scalar=valB, + viewOut=out) + + if len(viewA.shape) == 2: # 2D + out = None + if viewA.dtype.__name__ == "float64": + out = pk.View([viewA.shape[0], viewA.shape[1]], pk.double) + if viewA.dtype.__name__ == "float32": + out = pk.View([viewA.shape[0], viewA.shape[1]], pk.float) + + if out is None: raise RuntimeError("Incompatible Types") + pk.parallel_for(viewA.shape[0], + subtract_impl_scalar_2d, + cols=viewA.shape[1], + viewA=viewA, + scalar=valB, + viewOut=out) + return out diff --git a/tests/test_ufuncs.py b/tests/test_ufuncs.py index 2004c3a2..54326cba 100644 --- a/tests/test_ufuncs.py +++ b/tests/test_ufuncs.py @@ -373,7 +373,7 @@ def test_multi_array_1d_exposed_ufuncs_vs_numpy(pk_ufunc, viewB: pk.View1d = pk.View([10], pk_dtype) viewB[:] = np.full(10, 5, dtype=numpy_dtype) - actual = pk_ufunc(viewA=viewA, viewB=viewB) + actual = pk_ufunc(viewA, viewB) assert_allclose(actual, expected) @@ -459,6 +459,74 @@ def test_2d_exposed_ufuncs_vs_numpy(pk_ufunc, assert_allclose(actual, expected) +@pytest.mark.parametrize("pk_ufunc, numpy_ufunc", [ + (pk.subtract, np.subtract), +]) +@pytest.mark.parametrize("numpy_dtype", [ + (np.float64), + (np.float32), +]) +def test_multi_array_2d_exposed_ufuncs_vs_numpy(pk_ufunc, + numpy_ufunc, + numpy_dtype): + N = 4 + M = 7 + rng = default_rng(123) + np1 = rng.random((N, M)).astype(numpy_dtype) + np2 = rng.random((N, M)).astype(numpy_dtype) + expected = numpy_ufunc(np1, np2) + + view1 = pk.array(np1) + view2 = pk.array(np2) + actual = pk_ufunc(view1, view2) + + assert_allclose(actual, expected) + +@pytest.mark.parametrize("pk_ufunc, numpy_ufunc", [ + (pk.subtract, np.subtract), +]) +@pytest.mark.parametrize("numpy_dtype", [ + (np.float64), + (np.float32), +]) +@pytest.mark.parametrize("test_dim", [ + [4,3,1,1], [4,3,1,3], [4,3,4,1], [4,3,1], [4,3,3], [4,3], [4] +]) +def test_broadcast_array_exposed_ufuncs_vs_numpy(pk_ufunc, + numpy_ufunc, + numpy_dtype, + test_dim): + + np1 = None + np2 = None + rng = default_rng(123) + scalar = 3.0 + + if len(test_dim) == 4: + np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) + np2 = rng.random((test_dim[2], test_dim[3])).astype(numpy_dtype) + elif len(test_dim) == 3: + np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) + np2 = rng.random((test_dim[2])).astype(numpy_dtype) + elif len(test_dim) == 2: + np1 = rng.random((test_dim[0], test_dim[1])).astype(numpy_dtype) + np2 = scalar # 2d with scalar + elif len(test_dim) == 1: + np1 = rng.random((test_dim[0])).astype(numpy_dtype) + np2 = scalar # 1d with scalar + else: + raise NotImplementedError("Invalid test conditions: Broadcasting operations are only supported uptil 2D") + + assert np1 is not None and np2 is not None, "Invalid test conditions: Are parameters uptil 2D?" + + expected = numpy_ufunc(np1, np2) + + view1 = pk.array(np1) + view2 = pk.array(np2) if isinstance(np2, np.ndarray) else np2 + actual = pk_ufunc(view1, view2) + + assert_allclose(expected, actual) + @pytest.mark.parametrize("pk_dtype, numpy_dtype", [ (pk.double, np.float64), (pk.float, np.float32),