diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 96f96897f84..17eb905ecf8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1134,17 +1134,18 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen using _ScanInputTransform = oneapi::dpl::__internal::__no_op; using _WriteOp = oneapi::dpl::__par_backend_hetero::__simple_write_to_id; - auto __opt_return = __handle_sync_sycl_exception( - [=, &__exec] { - _GenInput __gen_transform{__unary_op}; - return __parallel_transform_reduce_then_scan( - __backend_tag, __exec, std::move(__in_rng), std::move(__out_rng), __gen_transform, __binary_op, - __gen_transform, _ScanInputTransform{}, _WriteOp{}, __init, _Inclusive{}, - /*_IsUniquePattern=*/std::false_type{}); - }, - __bypass_sycl_kernel_not_supported{}); - if (__opt_return) - return __opt_return.value(); + try + { + _GenInput __gen_transform{__unary_op}; + return __parallel_transform_reduce_then_scan(__backend_tag, __exec, __in_rng, __out_rng, + __gen_transform, __binary_op, __gen_transform, + _ScanInputTransform{}, _WriteOp{}, __init, _Inclusive{}, + /*_IsUniquePattern=*/std::false_type{}); + } + catch (const sycl::exception& e) + { + __bypass_sycl_kernel_not_supported(e); + } } #endif } @@ -1295,17 +1296,18 @@ __parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_t #if _ONEDPL_COMPILE_KERNEL if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { - auto __opt_return = __handle_sync_sycl_exception( - [=, &__exec] { - using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_unique_mask<_BinaryPredicate>; - using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<1, _Assign>; - return __parallel_reduce_then_scan_copy(__backend_tag, __exec, std::move(__rng), std::move(__result), - __n, _GenMask{__pred}, _WriteOp{_Assign{}}, - /*_IsUniquePattern=*/std::true_type{}); - }, - __bypass_sycl_kernel_not_supported{}); - if (__opt_return) - return __opt_return.value(); + try + { + using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_unique_mask<_BinaryPredicate>; + using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<1, _Assign>; + return __parallel_reduce_then_scan_copy(__backend_tag, __exec, __rng, __result, __n, _GenMask{__pred}, + _WriteOp{_Assign{}}, + /*_IsUniquePattern=*/std::true_type{}); + } + catch (const sycl::exception& e) + { + __bypass_sycl_kernel_not_supported(e); + } } #endif using _ReduceOp = std::plus; @@ -1344,8 +1346,8 @@ __parallel_reduce_by_segment_reduce_then_scan(oneapi::dpl::__internal::__device_ assert(__n > 1); return __parallel_transform_reduce_then_scan( __backend_tag, std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__ranges::make_zip_view(std::forward<_Range1>(__keys), std::forward<_Range2>(__values)), - oneapi::dpl::__ranges::make_zip_view(std::forward<_Range3>(__out_keys), std::forward<_Range4>(__out_values)), + oneapi::dpl::__ranges::zip_view(std::forward<_Range1>(__keys), std::forward<_Range2>(__values)), + oneapi::dpl::__ranges::zip_view(std::forward<_Range3>(__out_keys), std::forward<_Range4>(__out_values)), _GenReduceInput{__binary_pred}, _ReduceOp{__binary_op}, _GenScanInput{__binary_pred, __n}, _ScanInputTransform{}, _WriteOp{__binary_pred, __n}, oneapi::dpl::unseq_backend::__no_init_value>{}, @@ -1362,18 +1364,19 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen #if _ONEDPL_COMPILE_KERNEL if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { - auto __opt_return = __handle_sync_sycl_exception( - [=, &__exec] { - using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>; - using _WriteOp = - oneapi::dpl::__par_backend_hetero::__write_to_id_if_else; - return __parallel_reduce_then_scan_copy(__backend_tag, __exec, std::move(__rng), std::move(__result), - __n, _GenMask{__pred}, _WriteOp{}, - /*_IsUniquePattern=*/std::false_type{}); - }, - __bypass_sycl_kernel_not_supported{}); - if (__opt_return) - return __opt_return.value(); + try + { + using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>; + using _WriteOp = + oneapi::dpl::__par_backend_hetero::__write_to_id_if_else; + return __parallel_reduce_then_scan_copy(__backend_tag, __exec, __rng, __result, __n, _GenMask{__pred}, + _WriteOp{}, + /*_IsUniquePattern=*/std::false_type{}); + } + catch (const sycl::exception& e) + { + __bypass_sycl_kernel_not_supported(e); + } } #endif using _ReduceOp = std::plus; @@ -1418,17 +1421,18 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, #if _ONEDPL_COMPILE_KERNEL else if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { - auto __opt_return = __handle_sync_sycl_exception( - [=, &__exec] { - using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_Pred>; - using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<0, _Assign>; - return __parallel_reduce_then_scan_copy(__backend_tag, __exec, std::move(__in_rng), - std::move(__out_rng), __n, _GenMask{__pred}, _WriteOp{__assign}, - /*_IsUniquePattern=*/std::false_type{}); - }, - __bypass_sycl_kernel_not_supported{}); - if (__opt_return) - return __opt_return.value(); + try + { + using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_Pred>; + using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<0, _Assign>; + return __parallel_reduce_then_scan_copy(__backend_tag, __exec, __in_rng, __out_rng, __n, _GenMask{__pred}, + _WriteOp{__assign}, + /*_IsUniquePattern=*/std::false_type{}); + } + catch (const sycl::exception& e) + { + __bypass_sycl_kernel_not_supported(e); + } } #endif using _ReduceOp = std::plus<_Size>; @@ -1466,7 +1470,7 @@ __parallel_set_reduce_then_scan(oneapi::dpl::__internal::__device_backend_tag __ return __parallel_transform_reduce_then_scan( __backend_tag, std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__ranges::make_zip_view( + oneapi::dpl::__ranges::zip_view( std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2), oneapi::dpl::__ranges::all_view( __mask_buf.get_buffer())), @@ -1537,14 +1541,15 @@ __parallel_set_op(oneapi::dpl::__internal::__device_backend_tag __backend_tag, c #if _ONEDPL_COMPILE_KERNEL if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { - auto __opt_return = __handle_sync_sycl_exception( - [=, &__exec] { - return __parallel_set_reduce_then_scan(__backend_tag, __exec, std::move(__rng1), std::move(__rng2), - std::move(__result), __comp, __is_op_difference); - }, - __bypass_sycl_kernel_not_supported{}); - if (__opt_return) - return __opt_return.value(); + try + { + return __parallel_set_reduce_then_scan(__backend_tag, __exec, __rng1, __rng2, __result, __comp, + __is_op_difference); + } + catch (const sycl::exception& e) + { + __bypass_sycl_kernel_not_supported(e); + } } #endif return __parallel_set_scan(__backend_tag, __exec, std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2), @@ -2481,19 +2486,20 @@ __parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, cons { if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { - auto __opt_return = __handle_sync_sycl_exception( - [=, &__exec] { - auto __res = oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment_reduce_then_scan( - oneapi::dpl::__internal::__device_backend_tag{}, __exec, std::move(__keys), std::move(__values), - std::move(__out_keys), std::move(__out_values), __binary_pred, __binary_op); - __res.wait(); - // Because our init type ends up being tuple, return the first component which is the write index. Add 1 to return the - // past-the-end iterator pair of segmented reduction. - return std::get<0>(__res.get()) + 1; - }, - __bypass_sycl_kernel_not_supported{}); - if (__opt_return) - return __opt_return.value(); + try + { + auto __res = oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment_reduce_then_scan( + oneapi::dpl::__internal::__device_backend_tag{}, __exec, __keys, __values, __out_keys, __out_values, + __binary_pred, __binary_op); + __res.wait(); + // Because our init type ends up being tuple, return the first component which is the write index. Add 1 to return the + // past-the-end iterator pair of segmented reduction. + return std::get<0>(__res.get()) + 1; + } + catch (const sycl::exception& e) + { + __bypass_sycl_kernel_not_supported(e); + } } } #endif diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index 6dd08930e61..1775e834369 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -21,7 +21,6 @@ #include #include #include -#include #include "../../iterator_impl.h" @@ -846,39 +845,20 @@ class __static_monotonic_dispatcher<::std::integer_sequence<::std::uint16_t, _X, // This exception handler is intended to handle a software workaround by IGC for a hardware bug that // causes IGC to throw an exception for certain integrated graphics devices with -O0 compilation and // a required sub-group size of 32. -struct __bypass_sycl_kernel_not_supported +void +__bypass_sycl_kernel_not_supported(const sycl::exception& __e) { - void - operator()(const sycl::exception& __e) const - { - // The SYCL spec compliant solution would be to compare __e.code() and sycl::errc::kernel_not_supported - // and rethrow the encountered exception if the two do not compare equal. However, the icpx compiler currently - // returns a generic error code in violation of the SYCL spec which has a value of 7. If we are using the Intel - // compiler, then compare the value of the error code. Otherwise, assume the implementation is spec compliant. + // The SYCL spec compliant solution would be to compare __e.code() and sycl::errc::kernel_not_supported + // and rethrow the encountered exception if the two do not compare equal. However, the icpx compiler currently + // returns a generic error code in violation of the SYCL spec which has a value of 7. If we are using the Intel + // compiler, then compare the value of the error code. Otherwise, assume the implementation is spec compliant. #if _ONEDPL_SYCL_KERNEL_NOT_SUPPORTED_EXCEPTION_BROKEN - if (__e.code().value() != 7) - throw; + if (__e.code().value() != 7) + throw; #else // Generic SYCL compiler. Assume it is spec compliant. - if (__e.code() != sycl::errc::kernel_not_supported) - throw; + if (__e.code() != sycl::errc::kernel_not_supported) + throw; #endif - } -}; - -template -auto -__handle_sync_sycl_exception(_Callable __caller, _Handler __handler) -> std::optional -{ - try - { - return __caller(); - } - catch (const sycl::exception& __e) - { - // Handle the error and return an empty std::optional - __handler(__e); - return {}; - } } } // namespace __par_backend_hetero