Skip to content

Commit

Permalink
Resolve ESIMD sort KT compilation failures with new compiler (#1790)
Browse files Browse the repository at this point in the history
Two issues are fixed which cause compilation issues with newer DPC++ compilers:
* Unnecessary use of template keyword
* Usage of now removed sycl::ext::intel::esimd::sw_barrier fence.
Removing a single fence which should be safe to remove within the inter work-group communication caused hangs. This fence has been documented along with a macro guard, and a TODO has been made to address in the future.
---------

Signed-off-by: Matthew Michel <[email protected]>
  • Loading branch information
mmichel11 authored Aug 26, 2024
1 parent d612eb8 commit f786e0d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ __one_wg_kernel(sycl::nd_item<1> __idx, ::std::uint32_t __n, _RngPack1&& __rng_p
{
__dpl_esimd::__ns::simd<::std::uint16_t, __data_per_step> __bins_uw =
__bins.template select<__data_per_step, 1>(__s);
__write_addr.template select<__data_per_step, 1>(__s) += __bin_offset.template iselect(__bins_uw);
__write_addr.template select<__data_per_step, 1>(__s) += __bin_offset.iselect(__bins_uw);
}

// 2.6. Reorder keys in SLM.
Expand Down Expand Up @@ -480,10 +480,6 @@ struct __radix_sort_onesweep_kernel
static inline __dpl_esimd::__ns::simd<::std::uint32_t, 32>
__match_bins(const __dpl_esimd::__ns::simd<::std::uint32_t, 32>& __bins, ::std::uint32_t __local_tid)
{
// Software barriers (here and below) are used in order to trick the compiler,
// thus it generates memory operations with better performance
// TODO: check if it is still necessary.
__dpl_esimd::__ns::fence<__dpl_esimd::__ns::fence_mask::sw_barrier>();
__dpl_esimd::__ns::simd<::std::uint32_t, 32> __matched_bins(0xffffffff);
_ONEDPL_PRAGMA_UNROLL
for (int __i = 0; __i < __radix_bits; __i++)
Expand All @@ -494,7 +490,6 @@ struct __radix_sort_onesweep_kernel
::std::uint32_t __ones = __dpl_esimd::__ns::pack_mask(__bit != 0);
__matched_bins = __matched_bins & (__x ^ __ones);
}
__dpl_esimd::__ns::fence<__dpl_esimd::__ns::fence_mask::sw_barrier>();
return __matched_bins;
}

Expand Down Expand Up @@ -644,10 +639,13 @@ struct __radix_sort_onesweep_kernel
_GlobOffsetT, __bin_width, __dpl_esimd::__ens::lsc_data_size::default_size,
__dpl_esimd::__ens::cache_hint::uncached, __dpl_esimd::__ens::cache_hint::cached>(
__p_prev_group_hist + __local_tid * __bin_width);
// Software barrier is used in order to motivate the compiler
// to generate memory operations in an order which results in better performance
// TODO: check if it is still necessary.
__dpl_esimd::__ns::fence<__dpl_esimd::__ns::fence_mask::sw_barrier>();
// TODO: This fence is added to prevent a hang that occurs otherwise. However, this fence
// should not logically be needed. Consider removing once this has been further investigated.
// This preprocessor check is set to expire and needs to be reevaluated once the SYCL major version
// is upgraded to 9.
#if _ONEDPL_LIBSYCL_VERSION < 90000
__dpl_esimd::__ns::fence<__dpl_esimd::__ns::memory_kind::local>();
#endif
} while (((__prev_group_hist & __hist_updated) == 0).any());
__prev_group_hist_sum.merge(__prev_group_hist_sum + __prev_group_hist, __is_not_accumulated);
__is_not_accumulated = (__prev_group_hist_sum & __global_accumulated) == 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ __create_simd(_T initial, _T step)
{
__dpl_esimd::__ns::simd<_T, _N> ret;
ret.template select<16, 1>(0) = __dpl_esimd::__ns::simd<_T, 16>(0, 1) * step + initial;
__dpl_esimd::__ns::fence<__dpl_esimd::__ns::fence_mask::sw_barrier>();
_ONEDPL_PRAGMA_UNROLL
for (int pos = 16; pos < _N; pos += 16)
{
Expand Down

0 comments on commit f786e0d

Please sign in to comment.