From 65c93f4a9fea858a7c211d6a450d943e97edff9b Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Fri, 22 Nov 2024 06:27:13 -0800 Subject: [PATCH] validation fix --- plugin/sycl/common/linalg_op.h | 22 +++++++++++++--------- src/objective/regression_obj.cu | 3 +-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/plugin/sycl/common/linalg_op.h b/plugin/sycl/common/linalg_op.h index 078e63e99b4f..1439408093be 100644 --- a/plugin/sycl/common/linalg_op.h +++ b/plugin/sycl/common/linalg_op.h @@ -78,18 +78,22 @@ bool Validate(DeviceOrd device, TensorView t, Fn&& fn) { int flag = 0; { - ::sycl::buffer buff(&flag, 1); - size_t size = xgboost::linalg::cend(t) - xgboost::linalg::cbegin(t); + ::sycl::buffer flag_buf(&flag, 1); qu->submit([&](::sycl::handler& cgh) { - auto reduction = ::sycl::reduction(buff, cgh, ::sycl::maximum<>()); - cgh.parallel_for<>(::sycl::range<1>(size), reduction, - [=](::sycl::id<1> pid, auto& max) { - const size_t i = pid[0]; - auto it = xgboost::linalg::cbegin(t) + i; - max.combine(!const_cast(fn)(*it)); + auto flag_acc = flag_buf.get_access<::sycl::access::mode::write>(cgh); + cgh.parallel_for<>(::sycl::range<1>(t.Size()), + [=](::sycl::id<1> pid) { + const size_t idx = pid[0]; + const T& value = call(t, xgboost::linalg::UnravelIndex(idx, t.Shape())); + bool is_valid = const_cast(fn)(value); + if (!is_valid) { + AtomicRef flag_ref(flag_acc[0]); + flag_ref = 1; + } }); - }).wait_and_throw(); + }); } + qu->wait_and_throw(); return (flag == 0); } diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index 50523ba2fb5b..9106044fcded 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -121,7 +121,6 @@ class RegLossObj : public FitInterceptGlmLike { if (iter == 0) { ValidateLabel(info); } - size_t const ndata = preds.Size(); out_gpair->SetDevice(ctx_->Device()); auto device = ctx_->Device(); @@ -132,7 +131,7 @@ class RegLossObj : public FitInterceptGlmLike { additional_input_.HostVector().begin()[1] = is_null_weight; const size_t nthreads = ctx_->Threads(); - bool on_device = device.IsCUDA(); + bool on_device = !device.IsCPU(); // On CPU we run the transformation each thread processing a contigious block of data // for better performance. const size_t n_data_blocks = std::max(static_cast(1), (on_device ? ndata : nthreads));