From 65c93f4a9fea858a7c211d6a450d943e97edff9b Mon Sep 17 00:00:00 2001
From: Dmitry Razdoburdin <>
Date: Fri, 22 Nov 2024 06:27:13 -0800
Subject: [PATCH] validation fix

---
 plugin/sycl/common/linalg_op.h  | 22 +++++++++++++---------
 src/objective/regression_obj.cu |  3 +--
 2 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/plugin/sycl/common/linalg_op.h b/plugin/sycl/common/linalg_op.h
index 078e63e99b4f..1439408093be 100644
--- a/plugin/sycl/common/linalg_op.h
+++ b/plugin/sycl/common/linalg_op.h
@@ -78,18 +78,22 @@ bool Validate(DeviceOrd device, TensorView<T, D> t, Fn&& fn) {
 
   int flag = 0;
   {
-    ::sycl::buffer<int> buff(&flag, 1);
-    size_t size = xgboost::linalg::cend(t) - xgboost::linalg::cbegin(t);
+    ::sycl::buffer<int, 1> flag_buf(&flag, 1);
     qu->submit([&](::sycl::handler& cgh) {
-      auto reduction = ::sycl::reduction(buff, cgh, ::sycl::maximum<>());
-      cgh.parallel_for<>(::sycl::range<1>(size), reduction,
-                        [=](::sycl::id<1> pid, auto& max) {
-        const size_t i = pid[0];
-        auto it = xgboost::linalg::cbegin(t) + i;
-        max.combine(!const_cast<Fn&&>(fn)(*it));
+      auto flag_acc  = flag_buf.get_access<::sycl::access::mode::write>(cgh);
+      cgh.parallel_for<>(::sycl::range<1>(t.Size()),
+                         [=](::sycl::id<1> pid) {
+        const size_t idx = pid[0];
+        const T& value = call(t, xgboost::linalg::UnravelIndex(idx, t.Shape()));
+        bool is_valid = const_cast<Fn&&>(fn)(value);
+        if (!is_valid) {
+          AtomicRef<int> flag_ref(flag_acc[0]);
+          flag_ref = 1;
+        }
       });
-    }).wait_and_throw();
+    });
   }
+  qu->wait_and_throw();
   return (flag == 0);
 }
 
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 50523ba2fb5b..9106044fcded 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -121,7 +121,6 @@ class RegLossObj : public FitInterceptGlmLike {
     if (iter == 0) {
       ValidateLabel(info);
     }
-
     size_t const ndata = preds.Size();
     out_gpair->SetDevice(ctx_->Device());
     auto device = ctx_->Device();
@@ -132,7 +131,7 @@ class RegLossObj : public FitInterceptGlmLike {
     additional_input_.HostVector().begin()[1] = is_null_weight;
 
     const size_t nthreads = ctx_->Threads();
-    bool on_device = device.IsCUDA();
+    bool on_device = !device.IsCPU();
     // On CPU we run the transformation each thread processing a contigious block of data
     // for better performance.
     const size_t n_data_blocks = std::max(static_cast<size_t>(1), (on_device ? ndata : nthreads));