Merge pull request #3690 from kif/eventless_profiling

Eventless OpenCL profiling
silx-kit · Oct 10, 2022 · 96d8f1a · 96d8f1a
2 parents 6143948 + 4486077
commit 96d8f1a
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 42 deletions.
diff --git a/src/silx/opencl/processing.py b/src/silx/opencl/processing.py
@@ -37,7 +37,7 @@
 __contact__ = "[email protected]"
 __license__ = "MIT"
 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "02/03/2021"
+__date__ = "06/10/2022"
 __status__ = "stable"
 
 import sys
@@ -52,7 +52,8 @@
 import platform
 
 BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
-EventDescription = namedtuple("EventDescription", ["name", "event"])
+EventDescription = namedtuple("EventDescription", ["name", "event"])  # Deprecated, please use ProfileDescription
+ProfileDescription = namedtuple("ProfileDescription", ["name", "start", "stop"])
 
 logger = logging.getLogger(__name__)
 
@@ -285,6 +286,7 @@ def free_kernels(self):
         self.kernels = None
         self.program = None
 
+# Methods about Profiling
     def set_profiling(self, value=True):
         """Switch On/Off the profiling flag of the command queue to allow debugging
 
@@ -308,37 +310,42 @@ def profile_add(self, event, desc):
         """
         Add an OpenCL event to the events lists, if profiling is enabled.
 
-        :param event: silx.opencl.processing.EventDescription.
+        :param event: pyopencl.NanyEvent.
         :param desc: event description
         """
         if self.profile:
-            self.events.append(EventDescription(desc, event))
-
-    def allocate_texture(self, shape, hostbuf=None, support_1D=False):
-        return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
+            try:
+                profile = event.profile
+                self.events.append(ProfileDescription(desc, profile.start, profile.end))
+            except Exception:
+                # Probably the driver does not support profiling
+                pass
 
-    def transfer_to_texture(self, arr, tex_ref):
+    def profile_multi(self, event_lists):
         """
-        Transfer an array to a texture.
+        Extract profiling info from several OpenCL event, if profiling is enabled.
 
-        :param arr: Input array. Can be a numpy array or a pyopencl array.
-        :param tex_ref: texture reference (pyopencl._cl.Image).
+        :param event_lists: list of ("desc", pyopencl.NanyEvent).
         """
-        copy_args = [self.queue, tex_ref, arr]
-        shp = arr.shape
-        ndim = arr.ndim
-        if ndim == 1:
-            # pyopencl and OpenCL < 1.2 do not support image1d_t
-            # force 2D with one row in this case
-            # ~ ndim = 2
-            shp = (1,) + shp
-        copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
-        if not(isinstance(arr, numpy.ndarray)):  # assuming pyopencl.array.Array
-            # D->D copy
-            copy_args[2] = arr.data
-            copy_kwargs["offset"] = 0
-        ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
-        self.profile_add(ev, "Transfer to texture")
+        if self.profile:
+            for event_desc in event_lists:
+                if isinstance(event_desc, ProfileDescription):
+                    self.events.append(event_desc)
+                else:
+                    if isinstance(event_desc, EventDescription) or "__len__" in dir(e) and len(e) == 2:
+                        desc, event = event_desc
+                    else:
+                        desc = "?"
+                        event = event_desc
+                    try:
+                        profile = event.profile
+                        start = profile.start
+                        end = profile.end
+                    except Exception:
+                        # probably an unfinished job ... use old-style.
+                        self.events.append(event_desc)
+                    else:
+                        self.events.append(ProfileDescription(desc, start, end))
 
     def log_profile(self, stats=False):
         """If we are in profiling mode, prints out all timing for every single OpenCL call
@@ -358,20 +365,29 @@ def log_profile(self, stats=False):
 
         if self.profile:
             for e in self.events:
-                if "__len__" in dir(e) and len(e) >= 2:
+                if isinstance(e, ProfileDescription):
+                    name = e[0]
+                    t0 = e[1]
+                    t1 = e[2]
+                elif isinstance(e, EventDescription) or "__len__" in dir(e) and len(e) == 2:
                     name = e[0]
                     pr = e[1].profile
                     t0 = pr.start
                     t1 = pr.end
-                    et = 1e-6 * (t1 - t0)
-                    total_time += et
-                    if stats is None:
-                        out.append(f"{name:>50}        : {et:.3f}ms")
+                else:
+                    name = "?"
+                    t0 = e.profile.start
+                    t1 = e.profile.end
+
+                et = 1e-6 * (t1 - t0)
+                total_time += et
+                if stats is None:
+                    out.append(f"{name:>50}        : {et:.3f}ms")
+                else:
+                    if name in stats:
+                        stats[name].append(et)
                     else:
-                        if name in stats:
-                            stats[name].append(et)
-                        else:
-                            stats[name] = [et]
+                        stats[name] = [et]
             if stats is not None:
                 for k, v in stats.items():
                     n = numpy.array(v)
@@ -389,6 +405,33 @@ def reset_log(self):
         with self.sem:
             self.events = []
 
+# Methods about textures
+    def allocate_texture(self, shape, hostbuf=None, support_1D=False):
+        return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
+
+    def transfer_to_texture(self, arr, tex_ref):
+        """
+        Transfer an array to a texture.
+
+        :param arr: Input array. Can be a numpy array or a pyopencl array.
+        :param tex_ref: texture reference (pyopencl._cl.Image).
+        """
+        copy_args = [self.queue, tex_ref, arr]
+        shp = arr.shape
+        ndim = arr.ndim
+        if ndim == 1:
+            # pyopencl and OpenCL < 1.2 do not support image1d_t
+            # force 2D with one row in this case
+            # ~ ndim = 2
+            shp = (1,) + shp
+        copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
+        if not(isinstance(arr, numpy.ndarray)):  # assuming pyopencl.array.Array
+            # D->D copy
+            copy_args[2] = arr.data
+            copy_kwargs["offset"] = 0
+        ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
+        self.profile_add(ev, "Transfer to texture")
+
     @property
     def x87_volatile_option(self):
         # this is running 32 bits OpenCL woth POCL

diff --git a/src/silx/opencl/sift/test/test_image_setup.py b/src/silx/opencl/sift/test/test_image_setup.py
@@ -35,7 +35,7 @@
 __contact__ = "[email protected]"
 __license__ = "MIT"
 __copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "25/06/2018"
+__date__ = "06/10/2022"
 
 import numpy
 try:
@@ -59,8 +59,8 @@ def my_blur(img, sigma):
     x = numpy.arange(ksize) - (ksize - 1.0) / 2.0
     gaussian = numpy.exp(-(x / sigma) ** 2 / 2.0).astype(numpy.float32)
     gaussian /= gaussian.sum(dtype=numpy.float32)
-    tmp1 = scipy.ndimage.filters.convolve1d(img, gaussian, axis=-1, mode="reflect")
-    return scipy.ndimage.filters.convolve1d(tmp1, gaussian, axis=0, mode="reflect")
+    tmp1 = scipy.ndimage.convolve1d(img, gaussian, axis=-1, mode="reflect")
+    return scipy.ndimage.convolve1d(tmp1, gaussian, axis=0, mode="reflect")
 
 
 def local_maxmin_setup():
@@ -92,7 +92,7 @@ def local_maxmin_setup():
         # Blurs and DoGs pre-allocating
         g = (numpy.zeros(6 * height * width).astype(numpy.float32)).reshape(6, height, width)  # vector of 6 blurs
         DOGS = numpy.zeros((5, height, width), dtype=numpy.float32)  # vector of 5 DoGs
-        g[0, :, :] = numpy.copy(l)
+        g[0,:,:] = numpy.copy(l)
         '''
         sift.cpp pre-process
         '''
@@ -105,9 +105,9 @@ def local_maxmin_setup():
             # Convolving initial image to achieve std = initsigma = 1.6
             if (initsigma > cursigma):
                 sigma = numpy.sqrt(initsigma ** 2 - cursigma ** 2)
-                g[0, :, :] = my_blur(l, sigma)
+                g[0,:,:] = my_blur(l, sigma)
         else:
-            g[0, :, :] = numpy.copy(l)
+            g[0,:,:] = numpy.copy(l)
         '''
         Blurs and DoGs
         '''
@@ -152,7 +152,7 @@ def orientation_setup():
     # actual_nb_keypoints = numpy.int32(len((keypoints_prev[:,0])[keypoints_prev[:,1] != -1]))
     ref = numpy.copy(keypoints_prev)
     # There are actually less than "actual_nb_keypoints" keypoints ("holes" in the vector), but we can use it as a boundary
-    for i, k in enumerate(ref[:actual_nb_keypoints, :]):
+    for i, k in enumerate(ref[:actual_nb_keypoints,:]):
         ref[i] = my_interp_keypoint(DOGS, s, k[1], k[2], 5, peakthresh, width, height)
 
     grad, ori = my_gradient(blur)  # gradient is applied on blur[s]