diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py new file mode 100644 index 000000000..3ee013fe2 --- /dev/null +++ b/examples/demo_array_svm.py @@ -0,0 +1,54 @@ +import pyopencl as cl +import pyopencl.array as cl_array +import numpy as np +import numpy.linalg as la + +a = np.random.rand(500).astype(np.float32) +b = np.random.rand(500).astype(np.float32) + + +class SVMAllocator: + def __init__(self, ctx, flags, alignment): + self._context = ctx + self._flags = flags + self._alignment = alignment + + def __call__(self, nbytes): + return cl.SVM(cl.svm_empty( + ctx, self._flags, (nbytes,), np.int8, "C", self._alignment)) + + +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +alloc = SVMAllocator(ctx, + cl.svm_mem_flags.READ_WRITE | cl.svm_mem_flags.SVM_FINE_GRAIN_BUFFER, + 0) + +a_dev = cl_array.to_device(queue, a, allocator=alloc) +print("A_DEV", a_dev.data.mem.nbytes, a_dev.data.mem.__array_interface__) +b_dev = cl_array.to_device(queue, b, allocator=alloc) +dest_dev = cl_array.empty_like(a_dev) +print("DEST", dest_dev.data.mem.__array_interface__) + +prg = cl.Program(ctx, """ + __kernel void sum(__global const float *a, + __global const float *b, __global float *c) + { + int gid = get_global_id(0); + c[gid] = a[gid] + b[gid]; + } + """).build() + +knl = prg.sum # Use this Kernel object for repeated calls +knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data) + +# PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before +# we're done with it +diff = (dest_dev - (a_dev+b_dev)).get() +np.set_printoptions(linewidth=400) +print(dest_dev) +print((a_dev+b_dev).get()) +print(diff) +print(la.norm(diff)) +print("A_DEV", a_dev.data.mem.__array_interface__) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index ababfbb35..591f90916 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1771,7 +1771,12 @@ def enqueue_copy(queue, dest, src, **kwargs): src = SVM(src) is_blocking = kwargs.pop("is_blocking", True) - return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs) + + # FIXME POCL workaround + evt = _cl._enqueue_svm_memcpy(queue, False, dest, src, **kwargs) + if is_blocking: + evt.wait() + return evt else: # assume to-host @@ -1800,8 +1805,13 @@ def enqueue_copy(queue, dest, src, **kwargs): # from svm # dest is not a SVM instance, otherwise we'd be in the branch above is_blocking = kwargs.pop("is_blocking", True) - return _cl._enqueue_svm_memcpy( - queue, is_blocking, SVM(dest), src, **kwargs) + + evt = _cl._enqueue_svm_memcpy(queue, False, SVM(dest), src, **kwargs) + # FIXME: POCL workaround + if is_blocking: + evt.wait() + return evt + else: # assume from-host raise TypeError("enqueue_copy cannot perform host-to-host transfers") diff --git a/pyopencl/array.py b/pyopencl/array.py index 874ae92c4..c4222ca34 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -670,9 +670,14 @@ def set(self, ary, queue=None, async_=None, **kwargs): stacklevel=2) if self.size: - event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, - device_offset=self.offset, - is_blocking=not async_) + if self.offset: + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, + device_offset=self.offset, + is_blocking=not async_) + else: + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, + is_blocking=not async_) + self.add_event(event1) def _get(self, queue=None, ary=None, async_=None, **kwargs): @@ -720,9 +725,14 @@ def _get(self, queue=None, ary=None, async_=None, **kwargs): "to associate one.") if self.size: - event1 = cl.enqueue_copy(queue, ary, self.base_data, - device_offset=self.offset, - wait_for=self.events, is_blocking=not async_) + if self.offset: + event1 = cl.enqueue_copy(queue, ary, self.base_data, + device_offset=self.offset, + wait_for=self.events, is_blocking=not async_) + else: + event1 = cl.enqueue_copy(queue, ary, self.base_data, + wait_for=self.events, is_blocking=not async_) + self.add_event(event1) else: event1 = None