From 7c0a99c69937311224897d8d2929efc98e683b1c Mon Sep 17 00:00:00 2001 From: Dmitriy Sobolev Date: Mon, 10 Feb 2025 16:38:26 +0000 Subject: [PATCH] Make _ONEDPL_SYCL121_GROUP_BARRIER internal --- documentation/library_guide/macros.rst | 10 ---------- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 12 ++++++------ 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/documentation/library_guide/macros.rst b/documentation/library_guide/macros.rst index 4967fc9fb23..e9597867e80 100644 --- a/documentation/library_guide/macros.rst +++ b/documentation/library_guide/macros.rst @@ -135,14 +135,4 @@ Macro Description .. Note:: Define ``ONEDPL_FPGA_DEVICE`` and ``ONEDPL_FPGA_EMULATOR`` macros in the same application to run on a FPGA emulation device. Define only the ``ONEDPL_FPGA_DEVICE`` macro to run on a FPGA hardware device. ----------------------------------- ------------------------------ -``ONEDPL_SYCL121_GROUP_BARRIER`` The macro controls which API for group barriers oneDPL uses, - which can be either defined as in SYCL 1.2.1 or as in SYCL 2020. - It affects algorithms that use device execution policies. - - Set this macro to a non-zero value to enable SYCL 1.2.1 group barriers. - The default value is 1 when using the oneAPI DPC++ Compiler and 0 otherwise. - - .. Note:: Depending on a GPU driver, SYCL 1.2.1 group barriers can provide better performance - on Intel GPUs. The default value may change in future releases in favor of SYCL 2020 group barriers. ==================================== ============================== diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index c72ec71d231..90347b48766 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -232,28 +232,28 @@ __get_accessor_size(const _Accessor& __accessor) // The performance gap is negligible since // https://github.com/intel/intel-graphics-compiler/commit/ed639f68d142bc963a7b626badc207a42fb281cb (Aug 20, 2024) // But the fix is not a part of the LTS GPU drivers (Linux) yet. -#if !defined(ONEDPL_SYCL121_GROUP_BARRIER) +#if !defined(_ONEDPL_SYCL121_GROUP_BARRIER) # if _ONEDPL_LIBSYCL_VERSION -# define ONEDPL_SYCL121_GROUP_BARRIER 1 +# define _ONEDPL_SYCL121_GROUP_BARRIER 1 # else -# define ONEDPL_SYCL121_GROUP_BARRIER 0 +# define _ONEDPL_SYCL121_GROUP_BARRIER 0 # endif #endif -#if ONEDPL_SYCL121_GROUP_BARRIER +#if _ONEDPL_SYCL121_GROUP_BARRIER inline constexpr sycl::access::fence_space __fence_space_local = sycl::access::fence_space::local_space; inline constexpr sycl::access::fence_space __fence_space_global = sycl::access::fence_space::global_space; #else struct __fence_space_dummy{}; // No-op dummy type since SYCL 2020 does not specify memory fence spaces in group barriers inline constexpr __fence_space_dummy __fence_space_local{}; inline constexpr __fence_space_dummy __fence_space_global{}; -#endif // ONEDPL_SYCL121_GROUP_BARRIER +#endif // _ONEDPL_SYCL121_GROUP_BARRIER template void __group_barrier(_Item __item, [[maybe_unused]] _Space __space = __fence_space_local) { -#if ONEDPL_SYCL121_GROUP_BARRIER +#if _ONEDPL_SYCL121_GROUP_BARRIER __item.barrier(__space); #elif _ONEDPL_SYCL2020_GROUP_BARRIER_PRESENT sycl::group_barrier(__item.get_group(), sycl::memory_scope::work_group);