From ab78e468fb688e42fe41afc71ae7a18f3e68c101 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:09:44 -0500 Subject: [PATCH 01/16] __init__: import warn once at the top --- pyopencl/__init__.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 6420e5afb..016ad4d82 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -21,6 +21,7 @@ """ from sys import intern +from warnings import warn from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa @@ -43,7 +44,6 @@ import os from os.path import dirname, join, realpath if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)): - from warnings import warn warn("It looks like you are importing PyOpenCL from " "its source directory. This likely won't work.") raise @@ -267,7 +267,6 @@ class CommandQueueUsedAfterExit(UserWarning): def compiler_output(text): import os - from warnings import warn if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): warn(text, CompilerWarning) else: @@ -389,7 +388,6 @@ def enable_debugging(platform_or_context): import os os.environ["CPU_MAX_COMPUTE_UNITS"] = "1" else: - from warnings import warn warn("do not know how to enable debugging on '%s'" % platform.name) @@ -428,7 +426,6 @@ def _get_prg(self): return self._prg else: # "no program" can only happen in from-source case. - from warnings import warn warn("Pre-build attribute access defeats compiler caching.", stacklevel=3) @@ -662,7 +659,6 @@ def device_hashable_model_and_version_identifier(self): return ("v1", self.vendor, self.vendor_id, self.name, self.version) def device_persistent_unique_id(self): - from warnings import warn warn("Device.persistent_unique_id is deprecated. " "Use Device.hashable_model_and_version_identifier instead.", DeprecationWarning, stacklevel=2) @@ -684,7 +680,6 @@ def device_persistent_unique_id(self): def context_init(self, devices, properties, dev_type, cache_dir=None): if cache_dir is not None: - from warnings import warn warn("The 'cache_dir' argument to the Context constructor " "is deprecated and no longer has an effect. " "It was removed because it only applied to the wrapper " @@ -970,7 +965,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None, if hostbuf is not None and not \ (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): - from warnings import warn warn("'hostbuf' was passed, but no memory flags to make use of it.") if hostbuf is None and pitches is not None: @@ -1043,7 +1037,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None, class _ImageInfoGetter: def __init__(self, event): - from warnings import warn warn("Image.image.attr is deprecated and will go away in 2021. " "Use Image.attr directly, instead.") @@ -1927,7 +1920,6 @@ def enqueue_barrier(queue, wait_for=None): def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)): - from warnings import warn warn("The context for this queue does not declare OpenCL 1.2 support, so " "the next thing you might see is a crash") From 30e0085adaf1246b1a40f3a1707b80a1d1216b81 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:14:58 -0500 Subject: [PATCH 02/16] Add is_queue_in_order --- src/wrap_cl.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 98964056f..8de8cd0df 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -496,6 +496,19 @@ namespace pyopencl // }}} + // {{{ utility functions + + inline bool is_queue_in_order(cl_command_queue queue) + { + cl_command_queue_properties param_value; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, + (queue, CL_QUEUE_PROPERTIES, sizeof(param_value), ¶m_value, 0)); + return !(param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); + } + + // }}} + + // {{{ buffer interface helper From bfcfcc9fae09529bdc3ff2b119572c8d778b4cf1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:15:32 -0500 Subject: [PATCH 03/16] Add command_queue_ref --- src/wrap_cl.hpp | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 8de8cd0df..8147cb183 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1668,6 +1668,76 @@ namespace pyopencl // }}} + // {{{ command_queue_ref + + // In contrast to command_queue, command_queue_ref is "nullable", i.e. + // it is a RAII *optional* reference to a command queue. + + class command_queue_ref + { + private: + bool m_valid; + cl_command_queue m_queue; + + public: + command_queue_ref() + : m_valid(false) + {} + + command_queue_ref(cl_command_queue queue) + : m_valid(true), m_queue(queue) + { + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + } + + command_queue_ref(command_queue_ref &&src) + : m_valid(src.m_valid), m_queue(src.m_queue) + { + src.m_valid = false; + } + + command_queue_ref(const command_queue_ref &) = delete; + command_queue_ref &operator=(const command_queue_ref &) = delete; + + ~command_queue_ref() + { + reset(); + } + + bool is_valid() const + { + return m_valid; + } + + cl_command_queue data() const + { + if (m_valid) + return m_queue; + else + throw error("command_queue_ref.data", CL_INVALID_VALUE, + "command_queue_ref is not valid"); + } + + void reset() + { + if (m_valid) + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, (m_queue)); + m_valid = false; + } + + void set(cl_command_queue queue) + { + if (m_valid) + PYOPENCL_CALL_GUARDED(clReleaseCommandQueue, (m_queue)); + m_queue = queue; + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + m_valid = true; + } + }; + + // }}} + + // {{{ event/synchronization class event : noncopyable From defe1850dd4a1358484998201cc47da666c23e61 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 30 Jun 2022 23:18:16 -0500 Subject: [PATCH 04/16] Add, use enqueue_fill --- doc/runtime_memory.rst | 2 ++ pyopencl/__init__.py | 21 +++++++++++++++++++++ pyopencl/array.py | 4 ++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst index f4e01f266..ffe6661e8 100644 --- a/doc/runtime_memory.rst +++ b/doc/runtime_memory.rst @@ -281,6 +281,8 @@ Transfers .. autofunction:: enqueue_copy(queue, dest, src, **kwargs) +.. autofunction:: enqueue_fill(queue, dest, src, **kwargs) + Mapping Memory into Host Address Space -------------------------------------- diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 016ad4d82..86c77f05b 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -22,6 +22,7 @@ from sys import intern from warnings import warn +from typing import Union, Any from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa @@ -1815,6 +1816,26 @@ def enqueue_copy(queue, dest, src, **kwargs): # }}} +# {{{ enqueue_fill + +def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVM], + pattern: Any, size: int, *, offset: int = 0, wait_for=None) -> Event: + """ + .. versionadded:: 2022.2 + """ + if isinstance(dest, MemoryObjectHolder): + return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for) + elif isinstance(dest, SVM): + if offset: + raise NotImplementedError("enqueue_fill with SVM does not yet support " + "offsets") + return enqueue_svm_memfill(queue, dest, pattern, size, wait_for) + else: + raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'") + +# }}} + + # {{{ image creation DTYPE_TO_CHANNEL_TYPE = { diff --git a/pyopencl/array.py b/pyopencl/array.py index 4e8b52a13..fe6162645 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -1458,8 +1458,8 @@ def _zero_fill(self, queue=None, wait_for=None): # https://github.com/inducer/pyopencl/issues/395 if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31): self.add_event( - cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0), - self.offset, self.nbytes, wait_for=wait_for)) + cl.enqueue_fill(queue, self.base_data, np.int8(0), + self.nbytes, offset=self.offset, wait_for=wait_for)) else: zero = np.zeros((), self.dtype) self.fill(zero, queue=queue) From 8c15afc5d431a6c50a6043fe180ea1fd307955dd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 29 Mar 2021 00:21:04 -0500 Subject: [PATCH 05/16] Experiment with SVM backing arrays --- examples/demo_array_svm.py | 57 ++++++++++++++++++++++++++++++++++++++ pyopencl/array.py | 22 +++++++++++---- 2 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 examples/demo_array_svm.py diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py new file mode 100644 index 000000000..07454f1fe --- /dev/null +++ b/examples/demo_array_svm.py @@ -0,0 +1,57 @@ +import pyopencl as cl +import pyopencl.array as cl_array +import numpy as np +import numpy.linalg as la + +n = 5000000 +a = np.random.rand(n).astype(np.float32) +b = np.random.rand(n).astype(np.float32) + + +class SVMAllocator: + def __init__(self, ctx, flags, alignment): + self._context = ctx + self._flags = flags + self._alignment = alignment + + def __call__(self, nbytes): + return cl.SVM(cl.svm_empty( + ctx, self._flags, (nbytes,), np.int8, "C", self._alignment)) + + +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +alloc = SVMAllocator(ctx, + cl.svm_mem_flags.READ_WRITE | cl.svm_mem_flags.SVM_FINE_GRAIN_BUFFER, + 0) + +a_dev = cl_array.to_device(queue, a, allocator=alloc) +print("A_DEV", a_dev.data.mem.nbytes, a_dev.data.mem.__array_interface__) +b_dev = cl_array.to_device(queue, b, allocator=alloc) +dest_dev = cl_array.empty_like(a_dev) +print("DEST", dest_dev.data.mem.__array_interface__) + +prg = cl.Program(ctx, """ + __kernel void sum(__global const float *a, + __global const float *b, __global float *c) + { + int gid = get_global_id(0); + c[gid] = a[gid] + b[gid]; + } + """).build() + +knl = prg.sum # Use this Kernel object for repeated calls +knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data) + +# PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before +# we're done with it +diff = dest_dev - (a_dev+b_dev) +if 0: + diff = diff.get() + np.set_printoptions(linewidth=400) + print(dest_dev) + print((a_dev+b_dev).get()) + print(diff) + print(la.norm(diff)) + print("A_DEV", a_dev.data.mem.__array_interface__) diff --git a/pyopencl/array.py b/pyopencl/array.py index fe6162645..c191d0ed4 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -720,9 +720,14 @@ def set(self, ary, queue=None, async_=None, **kwargs): stacklevel=2) if self.size: - event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, - device_offset=self.offset, - is_blocking=not async_) + if self.offset: + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, + device_offset=self.offset, + is_blocking=not async_) + else: + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, + is_blocking=not async_) + self.add_event(event1) def _get(self, queue=None, ary=None, async_=None, **kwargs): @@ -770,9 +775,14 @@ def _get(self, queue=None, ary=None, async_=None, **kwargs): "to associate one.") if self.size: - event1 = cl.enqueue_copy(queue, ary, self.base_data, - device_offset=self.offset, - wait_for=self.events, is_blocking=not async_) + if self.offset: + event1 = cl.enqueue_copy(queue, ary, self.base_data, + device_offset=self.offset, + wait_for=self.events, is_blocking=not async_) + else: + event1 = cl.enqueue_copy(queue, ary, self.base_data, + wait_for=self.events, is_blocking=not async_) + self.add_event(event1) else: event1 = None From a286cc37d642f20e294a17a2fbdfdd26465ee5b0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Jun 2022 01:12:54 -0500 Subject: [PATCH 06/16] Allow tying an svm_allocation to a queue --- pyopencl/__init__.py | 14 +++++++++---- src/wrap_cl.hpp | 45 ++++++++++++++++++++++++++++++++++++++---- src/wrap_cl_part_2.cpp | 10 +++++++++- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 86c77f05b..5b8d8b72e 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1146,12 +1146,13 @@ def memory_map_exit(self, exc_type, exc_val, exc_tb): if get_cl_header_version() >= (2, 0): svmallocation_old_init = SVMAllocation.__init__ - def svmallocation_init(self, ctx, size, alignment, flags, _interface=None): + def svmallocation_init(self, ctx, size, alignment, flags, _interface=None, + queue=None): """ :arg ctx: a :class:`Context` :arg flags: some of :class:`svm_mem_flags`. """ - svmallocation_old_init(self, ctx, size, alignment, flags) + svmallocation_old_init(self, ctx, size, alignment, flags, queue) # mem_flags.READ_ONLY applies to kernels, not the host read_write = True @@ -1996,7 +1997,7 @@ def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): wait_for) -def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): +def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None): """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype* and *order*. (See :func:`numpy.empty` for the meaning of these arguments.) The array will be allocated in shared virtual memory belonging @@ -2014,6 +2015,10 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): will likely want to wrap the returned array in an :class:`SVM` tag. .. versionadded:: 2016.2 + + .. versionchanged:: 2022.2 + + *queue* argument added. """ dtype = np.dtype(dtype) @@ -2060,7 +2065,8 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): if alignment is None: alignment = itemsize - svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface) + svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface, + queue=queue) return np.asarray(svm_alloc) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 8147cb183..2582a5103 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3562,16 +3562,28 @@ namespace pyopencl }; - class svm_allocation : noncopyable + class svm_allocation { private: std::shared_ptr m_context; void *m_allocation; + command_queue_ref m_queue; + // FIXME Should keep a list of events so that we can wait for users + // to finish in the case of out-of-order queues. public: - svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags) + svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, + cl_svm_mem_flags flags, const command_queue *queue = nullptr) : m_context(ctx) { + if (queue) + { + m_queue.set(queue->data()); + if (!is_queue_in_order(m_queue.data())) + throw error("SVMAllocation.__init__", CL_INVALID_VALUE, + "supplying an out-of-order queue to SVMAllocation is invalid"); + } + PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); m_allocation = clSVMAlloc( ctx->data(), @@ -3581,6 +3593,9 @@ namespace pyopencl throw pyopencl::error("clSVMAlloc", CL_OUT_OF_RESOURCES); } + svm_allocation(const svm_allocation &) = delete; + svm_allocation &operator=(const svm_allocation &) = delete; + ~svm_allocation() { if (m_allocation) @@ -3593,8 +3608,20 @@ namespace pyopencl throw error("SVMAllocation.release", CL_INVALID_VALUE, "trying to double-unref svm allocation"); - clSVMFree(m_context->data(), m_allocation); - m_allocation = nullptr; + if (m_queue.is_valid()) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( + m_queue.data(), 1, &m_allocation, + nullptr, nullptr, + 0, nullptr, nullptr)); + m_queue.reset(); + } + else + { + PYOPENCL_PRINT_CALL_TRACE("clSVMFree"); + clSVMFree(m_context->data(), m_allocation); + m_allocation = nullptr; + } } void enqueue_release(command_queue &queue, py::object py_wait_for) @@ -3634,6 +3661,16 @@ namespace pyopencl { return m_allocation != other.m_allocation; } + + void bind_to_queue(command_queue const &queue) + { + m_queue.set(queue.data()); + } + + void unbind_from_queue() + { + m_queue.reset(); + } }; diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 0c9a0d1b1..ace5e9777 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -305,7 +305,13 @@ void pyopencl_expose_part_2(py::module &m) { typedef svm_allocation cls; py::class_(m, "SVMAllocation", py::dynamic_attr()) - .def(py::init, size_t, cl_uint, cl_svm_mem_flags>()) + .def(py::init, size_t, cl_uint, cl_svm_mem_flags, const command_queue *>(), + py::arg("context"), + py::arg("size"), + py::arg("alignment"), + py::arg("flags"), + py::arg("queue").none(true)=py::none() + ) .DEF_SIMPLE_METHOD(release) .def("enqueue_release", &cls::enqueue_release, ":returns: a :class:`pyopencl.Event`\n\n" @@ -314,6 +320,8 @@ void pyopencl_expose_part_2(py::module &m) .def(py::self == py::self) .def(py::self != py::self) .def("__hash__", &cls::ptr_as_int) + .DEF_SIMPLE_METHOD(bind_to_queue) + .DEF_SIMPLE_METHOD(unbind_from_queue) ; } From d893cd737ece8af456f25a573ed062f77876d5f5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Jun 2022 01:13:52 -0500 Subject: [PATCH 07/16] Add fold markers in wrap_mempool --- src/wrap_mempool.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 6b014ba5e..2738ccca2 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -70,6 +70,8 @@ namespace }; + // {{{ cl allocators + class cl_allocator_base { protected: @@ -210,8 +212,10 @@ namespace } }; + // }}} + // {{{ allocator_call inline pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) @@ -256,8 +260,10 @@ namespace } } + // }}} + // {{{ pooled_buffer class pooled_buffer : public pyopencl::pooled_allocation >, @@ -278,8 +284,10 @@ namespace { return ptr(); } }; + // }}} + // {{{{ device_pool_allocate pooled_buffer *device_pool_allocate( std::shared_ptr > pool, @@ -288,6 +296,9 @@ namespace return new pooled_buffer(pool, sz); } + // }}} + + @@ -398,3 +409,5 @@ void pyopencl_expose_mempool(py::module &m) ; } } + +// vim: foldmethod=marker From 7f9e0822ea8e092fd831909ec36ea15e15ce38bc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Jun 2022 01:14:15 -0500 Subject: [PATCH 08/16] Draft svm_allocator for mempool --- src/wrap_mempool.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 2738ccca2..35630b036 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -299,6 +299,60 @@ namespace // }}} + // {{{ svm allocator + + // FIXME: Does this need deferred and immediate just like the buffer-level + // allocators? (I.e. can I tell whether I am out of memory just from allocations?) + class svm_allocator + { + protected: + std::shared_ptr m_context; + cl_uint m_alignment; + cl_mem_flags m_flags; + + public: + svm_allocator(std::shared_ptr const &ctx, + cl_uint alignment, cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_alignment(alignment), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + throw pyopencl::error("Allocator", CL_INVALID_VALUE, + "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + } + + svm_allocator(svm_allocator const &src) + : m_context(src.m_context), m_alignment(src.m_alignment), + m_flags(src.m_flags) + { } + + virtual ~svm_allocator() + { } + + typedef void *pointer_type; + typedef size_t size_type; + + pointer_type allocate(size_type size) + { + if (size == 0) + return nullptr; + + PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); + return clSVMAlloc(m_context->data(), m_flags, size, m_alignment); + } + + void free(pointer_type p) + { + clSVMFree(m_context->data(), p); + } + + void try_release_blocks() + { + pyopencl::run_python_gc(); + } + }; + + // }}} + From 823e893faad822dc3eb842f31ac2b4d0597dae17 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:11:50 -0500 Subject: [PATCH 09/16] enqueue_copy for SVM: Accept no-op {src,dest}_offset, byte_count parameters --- pyopencl/__init__.py | 3 +++ src/wrap_cl_part_2.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 5b8d8b72e..12ae3be04 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1779,6 +1779,9 @@ def enqueue_copy(queue, dest, src, **kwargs): src = SVM(src) is_blocking = kwargs.pop("is_blocking", True) + assert kwargs.pop("src_offset", 0) == 0 + assert kwargs.pop("dest_offset", 0) == 0 + assert "byte_count" not in kwargs or kwargs.pop("byte_count") == src._size() return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs) else: diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index ace5e9777..a6f604ee7 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -299,6 +299,7 @@ void pyopencl_expose_part_2(py::module &m) typedef svm_arg_wrapper cls; py::class_(m, "SVM", py::dynamic_attr()) .def(py::init()) + .def("_size", &cls::size) ; } From b3b6345f99e5ab043fa59437c022cdbcfa1dfe53 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:17:38 -0500 Subject: [PATCH 10/16] Add SVMAllocator (in Python) --- pyopencl/tools.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 27adac75b..1b027f0ff 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -67,6 +67,25 @@ def _register_types(): # }}} +# {{{ svm allocator + +# FIXME: Replace me with C++ +class SVMAllocator: + def __init__(self, ctx, flags, *, alignment=0, queue=None): + self._context = ctx + self._flags = flags + self._alignment = alignment + self._queue = queue + + def __call__(self, nbytes): + import pyopencl as cl + return cl.SVM(cl.svm_empty( + self._context, self._flags, (nbytes,), np.int8, "C", self._alignment, + self._queue)) + +# }}} + + # {{{ first-arg caches _first_arg_dependent_caches = [] From 81f06d65e9aa40db827bc595f6547aa4c96d684d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:18:13 -0500 Subject: [PATCH 11/16] Jostle some section headers in wrap_cl.hpp --- src/wrap_cl.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 2582a5103..ec6089ccc 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -137,10 +137,8 @@ // }}} +// {{{ macros and typedefs for wrappers - - -// {{{ tools #if PY_VERSION_HEX >= 0x02050000 typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T; #else @@ -262,6 +260,7 @@ // }}} + // {{{ tracing and error reporting #ifdef PYOPENCL_TRACE #define PYOPENCL_PRINT_CALL_TRACE(NAME) \ @@ -329,6 +328,7 @@ // }}} + // {{{ get_info helpers #define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \ { \ @@ -383,6 +383,7 @@ // }}} + // {{{ event helpers -------------------------------------------------------------- #define PYOPENCL_PARSE_WAIT_FOR \ cl_uint num_events_in_wait_list = 0; \ @@ -424,7 +425,9 @@ // }}} + // {{{ equality testing + #define PYOPENCL_EQUALITY_TESTS(cls) \ bool operator==(cls const &other) const \ { return data() == other.data(); } \ @@ -432,8 +435,8 @@ { return data() != other.data(); } \ long hash() const \ { return (long) (intptr_t) data(); } -// }}} +// }}} namespace pyopencl From df4940c652e3cd43a886b8400bd3e0ff2bcbbae7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 2 Jul 2022 00:07:27 -0500 Subject: [PATCH 12/16] Add SVM._ptr_as_int() --- src/wrap_cl_part_2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index a6f604ee7..a2b5cde17 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -299,6 +299,7 @@ void pyopencl_expose_part_2(py::module &m) typedef svm_arg_wrapper cls; py::class_(m, "SVM", py::dynamic_attr()) .def(py::init()) + .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) .def("_size", &cls::size) ; } From 3abf89cc702fafc4a819866fea215d3c74253a20 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 7 Jul 2022 17:52:07 +0200 Subject: [PATCH 13/16] Move from SVM(np.ndarray(SVMAllocation)) to bare SVMAllocation --- examples/demo_array_svm.py | 23 ++--- pyopencl/__init__.py | 60 +++++------ pyopencl/tools.py | 6 +- src/wrap_cl.hpp | 200 ++++++++++++++++++++++++++++++++----- src/wrap_cl_part_2.cpp | 33 ++++-- 5 files changed, 234 insertions(+), 88 deletions(-) diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py index 07454f1fe..054e582c0 100644 --- a/examples/demo_array_svm.py +++ b/examples/demo_array_svm.py @@ -1,36 +1,24 @@ import pyopencl as cl import pyopencl.array as cl_array +from pyopencl.tools import SVMAllocator import numpy as np import numpy.linalg as la -n = 5000000 +n = 500000 a = np.random.rand(n).astype(np.float32) b = np.random.rand(n).astype(np.float32) -class SVMAllocator: - def __init__(self, ctx, flags, alignment): - self._context = ctx - self._flags = flags - self._alignment = alignment - - def __call__(self, nbytes): - return cl.SVM(cl.svm_empty( - ctx, self._flags, (nbytes,), np.int8, "C", self._alignment)) - - ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -alloc = SVMAllocator(ctx, - cl.svm_mem_flags.READ_WRITE | cl.svm_mem_flags.SVM_FINE_GRAIN_BUFFER, - 0) +alloc = SVMAllocator(ctx, cl.svm_mem_flags.READ_WRITE, queue=queue) a_dev = cl_array.to_device(queue, a, allocator=alloc) -print("A_DEV", a_dev.data.mem.nbytes, a_dev.data.mem.__array_interface__) +print("A_DEV", a_dev.data) b_dev = cl_array.to_device(queue, b, allocator=alloc) dest_dev = cl_array.empty_like(a_dev) -print("DEST", dest_dev.data.mem.__array_interface__) +print("DEST", dest_dev.data) prg = cl.Program(ctx, """ __kernel void sum(__global const float *a, @@ -47,6 +35,7 @@ def __call__(self, nbytes): # PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before # we're done with it diff = dest_dev - (a_dev+b_dev) + if 0: diff = diff.get() np.set_printoptions(linewidth=400) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 12ae3be04..22562d1f9 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -200,11 +200,9 @@ if get_cl_header_version() >= (2, 0): from pyopencl._cl import ( # noqa - SVMAllocation, + SVMPointer, SVM, - - # FIXME - #enqueue_svm_migratemem, + SVMAllocation, ) if _cl.have_gl(): @@ -1144,25 +1142,19 @@ def memory_map_exit(self, exc_type, exc_val, exc_tb): """ if get_cl_header_version() >= (2, 0): - svmallocation_old_init = SVMAllocation.__init__ - - def svmallocation_init(self, ctx, size, alignment, flags, _interface=None, - queue=None): - """ - :arg ctx: a :class:`Context` - :arg flags: some of :class:`svm_mem_flags`. - """ - svmallocation_old_init(self, ctx, size, alignment, flags, queue) - - # mem_flags.READ_ONLY applies to kernels, not the host - read_write = True - _interface["data"] = ( - int(self._ptr_as_int()), not read_write) - - self.__array_interface__ = _interface - - if get_cl_header_version() >= (2, 0): - SVMAllocation.__init__ = svmallocation_init + class _ArrayInterfaceSVMAllocation(SVMAllocation): + def __init__(self, ctx, size, alignment, flags, _interface=None, + queue=None): + """ + :arg ctx: a :class:`Context` + :arg flags: some of :class:`svm_mem_flags`. + """ + super().__init__(ctx, size, alignment, flags, queue) + + # mem_flags.READ_ONLY applies to kernels, not the host + read_write = True + _interface["data"] = ( + int(self._ptr_as_int()), not read_write) # }}} @@ -1773,15 +1765,14 @@ def enqueue_copy(queue, dest, src, **kwargs): else: raise ValueError("invalid dest mem object type") - elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVM): + elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer): # to SVM - if not isinstance(src, SVM): + if not isinstance(src, SVMPointer): src = SVM(src) is_blocking = kwargs.pop("is_blocking", True) assert kwargs.pop("src_offset", 0) == 0 assert kwargs.pop("dest_offset", 0) == 0 - assert "byte_count" not in kwargs or kwargs.pop("byte_count") == src._size() return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs) else: @@ -1807,7 +1798,7 @@ def enqueue_copy(queue, dest, src, **kwargs): queue, src, origin, region, dest, **kwargs) else: raise ValueError("invalid src mem object type") - elif isinstance(src, SVM): + elif isinstance(src, SVMPointer): # from svm # dest is not a SVM instance, otherwise we'd be in the branch above is_blocking = kwargs.pop("is_blocking", True) @@ -1822,14 +1813,14 @@ def enqueue_copy(queue, dest, src, **kwargs): # {{{ enqueue_fill -def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVM], +def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVMPointer], pattern: Any, size: int, *, offset: int = 0, wait_for=None) -> Event: """ .. versionadded:: 2022.2 """ if isinstance(dest, MemoryObjectHolder): return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for) - elif isinstance(dest, SVM): + elif isinstance(dest, SVMPointer): if offset: raise NotImplementedError("enqueue_fill with SVM does not yet support " "offsets") @@ -1961,7 +1952,7 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): """Fill shared virtual memory with a pattern. - :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object + :arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`. :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the fill pattern to be used. :arg byte_count: The size of the memory to be fill. Defaults to the @@ -1972,8 +1963,8 @@ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): .. versionadded:: 2016.2 """ - if not isinstance(dest, SVM): - dest = SVM(dest) + if not isinstance(dest, SVMPointer): + dest = SVMPointer(dest) return _cl._enqueue_svm_memfill( queue, dest, pattern, byte_count=None, wait_for=None) @@ -1982,7 +1973,7 @@ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): """ :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy` - arrays), optionally wrapped in :class:`SVM` objects. + arrays), or any implementation of :class:`SVMPointer`. :arg flags: a combination of :class:`mem_migration_flags` |std-enqueue-blurb| @@ -2068,7 +2059,8 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None): if alignment is None: alignment = itemsize - svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface, + svm_alloc = _ArrayInterfaceSVMAllocation( + ctx, nbytes, alignment, flags, _interface=interface, queue=queue) return np.asarray(svm_alloc) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 1b027f0ff..09e4d0566 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -79,9 +79,9 @@ def __init__(self, ctx, flags, *, alignment=0, queue=None): def __call__(self, nbytes): import pyopencl as cl - return cl.SVM(cl.svm_empty( - self._context, self._flags, (nbytes,), np.int8, "C", self._alignment, - self._queue)) + return cl.SVMAllocation( + self._context, nbytes, self._alignment, self._flags, + queue=self._queue) # }}} diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index ec6089ccc..76477f55f 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -227,8 +227,6 @@ } - - #define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \ { \ bool failed_with_mem_error = false; \ @@ -258,6 +256,17 @@ } \ } + +#define PYOPENCL_GET_SVM_SIZE(NAME) \ + size_t NAME##_size; \ + bool NAME##_has_size = false; \ + try \ + { \ + NAME##_size = NAME.size(); \ + NAME##_has_size = true; \ + } \ + catch (size_not_available) { } + // }}} @@ -501,12 +510,12 @@ namespace pyopencl // {{{ utility functions - inline bool is_queue_in_order(cl_command_queue queue) + inline bool is_queue_out_of_order(cl_command_queue queue) { cl_command_queue_properties param_value; PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (queue, CL_QUEUE_PROPERTIES, sizeof(param_value), ¶m_value, 0)); - return !(param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); + return param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } // }}} @@ -3527,11 +3536,26 @@ namespace pyopencl // }}} - // {{{ svm - #if PYOPENCL_CL_VERSION >= 0x2000 - class svm_arg_wrapper + // {{{ svm pointer + + class size_not_available { }; + + class svm_pointer + { + public: + virtual void *ptr() const = 0; + // may throw size_not_available + virtual size_t size() const = 0; + }; + + // }}} + + + // {{{ svm_arg_wrapper + + class svm_arg_wrapper : public svm_pointer { private: void *m_ptr; @@ -3564,12 +3588,17 @@ namespace pyopencl } }; + // }}} + - class svm_allocation + // {{{ svm_allocation + + class svm_allocation : public svm_pointer { private: std::shared_ptr m_context; void *m_allocation; + size_t m_size; command_queue_ref m_queue; // FIXME Should keep a list of events so that we can wait for users // to finish in the case of out-of-order queues. @@ -3577,12 +3606,12 @@ namespace pyopencl public: svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags, const command_queue *queue = nullptr) - : m_context(ctx) + : m_context(ctx), m_size(size) { if (queue) { m_queue.set(queue->data()); - if (!is_queue_in_order(m_queue.data())) + if (is_queue_out_of_order(m_queue.data())) throw error("SVMAllocation.__init__", CL_INVALID_VALUE, "supplying an out-of-order queue to SVMAllocation is invalid"); } @@ -3650,6 +3679,11 @@ namespace pyopencl return m_allocation; } + size_t size() const + { + return m_size; + } + intptr_t ptr_as_int() const { return (intptr_t) m_allocation; @@ -3667,29 +3701,92 @@ namespace pyopencl void bind_to_queue(command_queue const &queue) { + if (is_queue_out_of_order(m_queue.data())) + throw error("SVMAllocation.bind_to_queue", CL_INVALID_VALUE, + "supplying an out-of-order queue to SVMAllocation is invalid"); + + if (m_queue.is_valid()) + { + // make sure synchronization promises stay valid in new queue + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_queue.data(), &evt)); + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (queue.data(), 1, &evt)); + } + m_queue.set(queue.data()); } void unbind_from_queue() { + // NOTE: This absolves the allocation from any synchronization promises + // made. Keeping those before calling this method is the responsibility + // of the user. m_queue.reset(); } }; + // }}} + + + // {{{ svm operations inline event *enqueue_svm_memcpy( command_queue &cq, cl_bool is_blocking, - svm_arg_wrapper &dst, svm_arg_wrapper &src, - py::object py_wait_for + svm_pointer &dst, svm_pointer &src, + py::object py_wait_for, + py::object byte_count_py ) { PYOPENCL_PARSE_WAIT_FOR; - if (src.size() != dst.size()) + // {{{ process size + + PYOPENCL_GET_SVM_SIZE(src); + PYOPENCL_GET_SVM_SIZE(dst); + + size_t size; + bool have_size = false; + + if (src_has_size) + { + size = src_size; + have_size = true; + } + if (dst_has_size) + { + if (have_size) + { + if (!byte_count_py.is_none()) + size = std::min(size, dst_size); + else if (size != dst_size) + throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, + "sizes of source and destination buffer do not match"); + } + else + { + size = dst_size; + have_size = true; + } + } + + if (!byte_count_py.is_none()) + { + size_t byte_count = byte_count_py.cast(); + if (have_size && byte_count > size) + throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, + "specified byte_count larger than size of source or destination buffers"); + size = byte_count; + have_size = true; + } + + if (!have_size) throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, - "sizes of source and destination buffer do not match"); + "size not passed and could not be determined"); + + // }}} cl_event evt; PYOPENCL_CALL_GUARDED( @@ -3698,7 +3795,7 @@ namespace pyopencl cq.data(), is_blocking, dst.ptr(), src.ptr(), - dst.size(), + size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3710,7 +3807,7 @@ namespace pyopencl inline event *enqueue_svm_memfill( command_queue &cq, - svm_arg_wrapper &dst, py::object py_pattern, + svm_pointer &dst, py::object py_pattern, py::object byte_count, py::object py_wait_for ) @@ -3727,9 +3824,32 @@ namespace pyopencl pattern_ptr = pattern_ward->m_buf.buf; pattern_len = pattern_ward->m_buf.len; - size_t fill_size = dst.size(); + // {{{ process size + + PYOPENCL_GET_SVM_SIZE(dst); + + size_t size; + bool have_size = false; + if (dst_has_size) + { + size = dst_size; + have_size = true; + } if (!byte_count.is_none()) - fill_size = py::cast(byte_count); + { + size_t user_size = py::cast(byte_count); + if (have_size && user_size > size) + throw error("enqueue_svm_memfill", CL_INVALID_VALUE, + "byte_count too large for specified SVM buffer"); + } + + if (!have_size) + { + throw error("enqueue_svm_memfill", CL_INVALID_VALUE, + "byte_count not passed and could not be determined"); + } + + // }}} cl_event evt; PYOPENCL_CALL_GUARDED( @@ -3738,7 +3858,7 @@ namespace pyopencl cq.data(), dst.ptr(), pattern_ptr, pattern_len, - fill_size, + size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3752,12 +3872,40 @@ namespace pyopencl command_queue &cq, cl_bool is_blocking, cl_map_flags flags, - svm_arg_wrapper &svm, - py::object py_wait_for + svm_pointer &svm, + py::object py_wait_for, + py::object user_size_py ) { PYOPENCL_PARSE_WAIT_FOR; + // {{{ process size + + PYOPENCL_GET_SVM_SIZE(svm); + + size_t size; + bool have_size = false; + if (svm_has_size) + { + size = svm_size; + have_size = true; + } + if (!user_size_py.is_none()) + { + size_t user_size = py::cast(user_size_py); + if (have_size && user_size > size) + throw error("enqueue_svm_memfill", CL_INVALID_VALUE, + "user-provided size too large for specified SVM buffer"); + } + + if (!have_size) + { + throw error("enqueue_svm_mem_map", CL_INVALID_VALUE, + "size not passed and could not be determined"); + } + + // }}} + cl_event evt; PYOPENCL_CALL_GUARDED( clEnqueueSVMMap, @@ -3765,7 +3913,7 @@ namespace pyopencl cq.data(), is_blocking, flags, - svm.ptr(), svm.size(), + svm.ptr(), size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3777,7 +3925,7 @@ namespace pyopencl inline event *enqueue_svm_unmap( command_queue &cq, - svm_arg_wrapper &svm, + svm_pointer &svm, py::object py_wait_for ) { @@ -3814,7 +3962,7 @@ namespace pyopencl for (py::handle py_svm: svms) { - svm_arg_wrapper &svm(py::cast(py_svm)); + svm_pointer &svm(py::cast(py_svm)); svm_pointers.push_back(svm.ptr()); sizes.push_back(svm.size()); @@ -4609,7 +4757,7 @@ namespace pyopencl } #if PYOPENCL_CL_VERSION >= 0x2000 - void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp) + void set_arg_svm(cl_uint arg_index, svm_pointer const &wrp) { PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, (m_kernel, arg_index, wrp.ptr())); @@ -4634,7 +4782,7 @@ namespace pyopencl #if PYOPENCL_CL_VERSION >= 0x2000 try { - set_arg_svm(arg_index, arg.cast()); + set_arg_svm(arg_index, arg.cast()); return; } catch (py::cast_error &) { } diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index a2b5cde17..453f34c38 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -295,18 +295,34 @@ void pyopencl_expose_part_2(py::module &m) // {{{ svm #if PYOPENCL_CL_VERSION >= 0x2000 + { + typedef svm_pointer cls; + py::class_(m, "SVMPointer", py::dynamic_attr()) + .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) + .def("_size", [](cls &self) -> py::object + { + try + { + return py::cast(self.size()); + } + catch (size_not_available) + { + return py::none(); + } + }) + ; + } + { typedef svm_arg_wrapper cls; - py::class_(m, "SVM", py::dynamic_attr()) + py::class_(m, "SVM", py::dynamic_attr()) .def(py::init()) - .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) - .def("_size", &cls::size) ; } { typedef svm_allocation cls; - py::class_(m, "SVMAllocation", py::dynamic_attr()) + py::class_(m, "SVMAllocation", py::dynamic_attr()) .def(py::init, size_t, cl_uint, cl_svm_mem_flags, const command_queue *>(), py::arg("context"), py::arg("size"), @@ -318,10 +334,9 @@ void pyopencl_expose_part_2(py::module &m) .def("enqueue_release", &cls::enqueue_release, ":returns: a :class:`pyopencl.Event`\n\n" "|std-enqueue-blurb|") - .def("_ptr_as_int", &cls::ptr_as_int) .def(py::self == py::self) .def(py::self != py::self) - .def("__hash__", &cls::ptr_as_int) + .def("__hash__", [](cls &self) { return (intptr_t) self.ptr(); }) .DEF_SIMPLE_METHOD(bind_to_queue) .DEF_SIMPLE_METHOD(unbind_from_queue) ; @@ -332,7 +347,8 @@ void pyopencl_expose_part_2(py::module &m) py::arg("is_blocking"), py::arg("dst"), py::arg("src"), - py::arg("wait_for")=py::none() + py::arg("wait_for")=py::none(), + py::arg("byte_count")=py::none() ); m.def("_enqueue_svm_memfill", enqueue_svm_memfill, @@ -348,7 +364,8 @@ void pyopencl_expose_part_2(py::module &m) py::arg("is_blocking"), py::arg("flags"), py::arg("svm"), - py::arg("wait_for")=py::none() + py::arg("wait_for")=py::none(), + py::arg("size")=py::none() ); m.def("_enqueue_svm_unmap", enqueue_svm_unmap, From c34f7ef958dac5f915b2144c851a252b3e3cd7a6 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 21 Jul 2022 22:08:05 -0500 Subject: [PATCH 14/16] initial SVMMemoryPool implementation --- pyopencl/tools.py | 4 +- src/wrap_mempool.cpp | 114 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 09e4d0566..9389bcb0a 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -62,7 +62,9 @@ def _register_types(): PooledBuffer as PooledBuffer, _tools_DeferredAllocator as DeferredAllocator, _tools_ImmediateAllocator as ImmediateAllocator, - MemoryPool as MemoryPool) + _tools_SVMAllocator as SVMAllocator, + MemoryPool as MemoryPool, + SVMMemoryPool as SVMMemoryPool) # }}} diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 35630b036..875c72f4a 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -349,11 +349,95 @@ namespace { pyopencl::run_python_gc(); } + + bool is_deferred() const { return false; } + + svm_allocator *copy() const + { + return new svm_allocator(*this); + } }; // }}} + // {{{ SVM mempool + + inline + void *svm_allocator_call(svm_allocator &alloc, size_t size) + { + void * mem; + int try_count = 0; + while (try_count < 2) + { + try + { + mem = alloc.allocate(size); + break; + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + if (++try_count == 2) + throw; + } + + alloc.try_release_blocks(); + } + + if (!mem) + { + if (size == 0) + return nullptr; + else + throw pyopencl::error("Allocator", CL_INVALID_VALUE, + "svm allocator succeeded but returned NULL pointer"); + } + + return mem; + } + + // }}} + + + // {{{ pooled_buffer + + class svm_pooled_buffer + : public pyopencl::pooled_allocation > //, + // public pyopencl::memory_object_holder + { + private: + typedef + pyopencl::pooled_allocation > + super; + + public: + svm_pooled_buffer( + std::shared_ptr p, super::size_type s) + : super(p, s) + { } + + const super::pointer_type data() const + { return ptr(); } + }; + + // }}} + + + // {{{{ device_pool_allocate + + svm_pooled_buffer *svm_device_pool_allocate( + std::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) + { + return new svm_pooled_buffer(pool, sz); + } + + + // }}} + + template @@ -454,6 +538,36 @@ void pyopencl_expose_mempool(py::module &m) expose_memory_pool(wrapper); } + { + typedef svm_allocator cls; + py::class_ wrapper( + m, "_tools_SVMAllocator"); + wrapper + .def(py::init()) + .def(py::init const &, cl_uint, cl_mem_flags>()) + ; + } + + { + typedef pyopencl::memory_pool cls; + + py::class_< + cls, /* boost::noncopyable, */ + std::shared_ptr> wrapper( m, "SVMMemoryPool"); + wrapper + .def(py::init(), + py::arg("allocator"), + py::arg("leading_bits_in_bin_id")=4 + ) + .def("allocate", svm_device_pool_allocate) + .def("__call__", svm_device_pool_allocate) + // undoc for now + .DEF_SIMPLE_METHOD(set_trace) + ; + + expose_memory_pool(wrapper); + } + { typedef pooled_buffer cls; py::class_ Date: Thu, 21 Jul 2022 23:09:06 -0500 Subject: [PATCH 15/16] make SVMAllocator C++-only --- pyopencl/tools.py | 26 +++++++++++++------------- src/wrap_mempool.cpp | 12 +++++++++--- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 9389bcb0a..782b6af5f 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -71,19 +71,19 @@ def _register_types(): # {{{ svm allocator -# FIXME: Replace me with C++ -class SVMAllocator: - def __init__(self, ctx, flags, *, alignment=0, queue=None): - self._context = ctx - self._flags = flags - self._alignment = alignment - self._queue = queue - - def __call__(self, nbytes): - import pyopencl as cl - return cl.SVMAllocation( - self._context, nbytes, self._alignment, self._flags, - queue=self._queue) +# # FIXME: Replace me with C++ +# class SVMAllocator: +# def __init__(self, ctx, flags, *, alignment=0, queue=None): +# self._context = ctx +# self._flags = flags +# self._alignment = alignment +# self._queue = queue + +# def __call__(self, nbytes): +# import pyopencl as cl +# return cl.SVMAllocation( +# self._context, nbytes, self._alignment, self._flags, +# queue=self._queue) # }}} diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 875c72f4a..15691d612 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -307,13 +307,15 @@ namespace { protected: std::shared_ptr m_context; + pyopencl::command_queue m_queue; cl_uint m_alignment; cl_mem_flags m_flags; public: svm_allocator(std::shared_ptr const &ctx, + pyopencl::command_queue &queue, cl_uint alignment, cl_mem_flags flags=CL_MEM_READ_WRITE) - : m_context(ctx), m_alignment(alignment), m_flags(flags) + : m_context(ctx), m_queue(queue), m_alignment(alignment), m_flags(flags) { if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) throw pyopencl::error("Allocator", CL_INVALID_VALUE, @@ -321,7 +323,7 @@ namespace } svm_allocator(svm_allocator const &src) - : m_context(src.m_context), m_alignment(src.m_alignment), + : m_context(src.m_context), m_queue(src.m_queue), m_alignment(src.m_alignment), m_flags(src.m_flags) { } @@ -544,7 +546,11 @@ void pyopencl_expose_mempool(py::module &m) m, "_tools_SVMAllocator"); wrapper .def(py::init()) - .def(py::init const &, cl_uint, cl_mem_flags>()) + .def(py::init const &, pyopencl::command_queue &, cl_uint, cl_mem_flags>(), + py::arg("ctx"), + py::arg("queue"), + py::arg("alignment")=0, + py::arg("flags")=CL_MEM_READ_WRITE) ; } From 437dbc36863944431877761921dc38a500223cf1 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 21 Jul 2022 23:45:55 -0500 Subject: [PATCH 16/16] svm_pooled_allocation --- pyopencl/tools.py | 1 + src/wrap_mempool.cpp | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 782b6af5f..0cc707cfe 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -60,6 +60,7 @@ def _register_types(): from pyopencl._cl import ( # noqa PooledBuffer as PooledBuffer, + SVMPooledAllocation as SVMPooledAllocation, _tools_DeferredAllocator as DeferredAllocator, _tools_ImmediateAllocator as ImmediateAllocator, _tools_SVMAllocator as SVMAllocator, diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 15691d612..85bb287be 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -305,7 +305,7 @@ namespace // allocators? (I.e. can I tell whether I am out of memory just from allocations?) class svm_allocator { - protected: + public: std::shared_ptr m_context; pyopencl::command_queue m_queue; cl_uint m_alignment; @@ -366,7 +366,7 @@ namespace // {{{ SVM mempool inline - void *svm_allocator_call(svm_allocator &alloc, size_t size) + pyopencl::svm_allocation *svm_allocator_call(svm_allocator &alloc, size_t size) { void * mem; int try_count = 0; @@ -397,7 +397,8 @@ namespace "svm allocator succeeded but returned NULL pointer"); } - return mem; + + return new pyopencl::svm_allocation(alloc.m_context, size, alloc.m_alignment, alloc.m_flags, &alloc.m_queue); } // }}} @@ -405,7 +406,7 @@ namespace // {{{ pooled_buffer - class svm_pooled_buffer + class svm_pooled_allocation : public pyopencl::pooled_allocation > //, // public pyopencl::memory_object_holder { @@ -415,7 +416,7 @@ namespace super; public: - svm_pooled_buffer( + svm_pooled_allocation( std::shared_ptr p, super::size_type s) : super(p, s) { } @@ -429,11 +430,11 @@ namespace // {{{{ device_pool_allocate - svm_pooled_buffer *svm_device_pool_allocate( + svm_pooled_allocation *svm_device_pool_allocate( std::shared_ptr > pool, pyopencl::memory_pool::size_type sz) { - return new svm_pooled_buffer(pool, sz); + return new svm_pooled_allocation(pool, sz); } @@ -574,6 +575,14 @@ void pyopencl_expose_mempool(py::module &m) expose_memory_pool(wrapper); } + { + typedef svm_pooled_allocation cls; + py::class_( + m, "SVMPooledAllocation"/* , py::no_init */) + .def("release", &cls::free) + ; + } + { typedef pooled_buffer cls; py::class_