From 2efde4e740b886a52d9b100915c4f98fe8df7583 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Tue, 15 Oct 2024 04:42:51 -0700 Subject: [PATCH] remove data duplication in Ghist index --- plugin/sycl/common/hist_util.cc | 4 ++-- plugin/sycl/data/gradient_index.cc | 25 ++++++++----------------- plugin/sycl/data/gradient_index.h | 27 ++------------------------- 3 files changed, 12 insertions(+), 44 deletions(-) diff --git a/plugin/sycl/common/hist_util.cc b/plugin/sycl/common/hist_util.cc index 9f35429678bc..136036253592 100644 --- a/plugin/sycl/common/hist_util.cc +++ b/plugin/sycl/common/hist_util.cc @@ -130,7 +130,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu, const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride; const auto* pgh = gpair_device.DataConst(); const BinIdxType* gradient_index = gmat.index.data(); - const uint32_t* offsets = gmat.index.Offset(); + const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer(); const size_t nbins = gmat.nbins; const size_t max_work_group_size = @@ -210,7 +210,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu, const GradientPair::ValueT* pgh = reinterpret_cast(gpair_device.DataConst()); const BinIdxType* gradient_index = gmat.index.data(); - const uint32_t* offsets = gmat.index.Offset(); + const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer(); FPType* hist_data = reinterpret_cast(hist->Data()); const size_t nbins = gmat.nbins; diff --git a/plugin/sycl/data/gradient_index.cc b/plugin/sycl/data/gradient_index.cc index 7476b789c4b2..0123fae149d4 100644 --- a/plugin/sycl/data/gradient_index.cc +++ b/plugin/sycl/data/gradient_index.cc @@ -48,13 +48,12 @@ void mergeSort(BinIdxType* begin, BinIdxType* end, BinIdxType* buf) { } } -template +template void GHistIndexMatrix::SetIndexData(::sycl::queue* qu, BinIdxType* index_data, DMatrix *dmat, size_t nbins, - size_t row_stride, - const uint32_t* offsets) { + size_t row_stride) { if (nbins == 0) return; const bst_float* cut_values = cut.cut_values_.ConstDevicePointer(); const uint32_t* cut_ptrs = cut.cut_ptrs_.ConstDevicePointer(); @@ -80,11 +79,11 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue* qu, const size_t start = (i + base_rowid) * row_stride; for (bst_uint j = 0; j < size; ++j) { uint32_t idx = SearchBin(cut_values, cut_ptrs, data_ptr[ibegin + j]); - index_data[start + j] = offsets ? idx - offsets[j] : idx; + index_data[start + j] = isDense ? idx - cut_ptrs[j] : idx; AtomicRef hit_count_ref(hit_count_ptr[idx]); hit_count_ref.fetch_add(1); } - if (!offsets) { + if constexpr (!isDense) { // Sparse case only mergeSort(index_data + start, index_data + start + size, sort_data + start); for (bst_uint j = size; j < row_stride; ++j) { @@ -153,30 +152,22 @@ void GHistIndexMatrix::Init(::sycl::queue* qu, CHECK_GT(cut.cut_values_.Size(), 0U); - uint32_t* offsets = nullptr; - if (isDense) { - index.ResizeOffset(n_offsets); - offsets = index.Offset(); - qu->memcpy(offsets, cut.cut_ptrs_.ConstDevicePointer(), - sizeof(uint32_t) * n_offsets).wait_and_throw(); - } - if (isDense) { BinTypeSize curent_bin_size = index.GetBinTypeSize(); if (curent_bin_size == BinTypeSize::kUint8BinsTypeSize) { - SetIndexData(qu, index.data(), dmat, nbins, row_stride, offsets); + SetIndexData(qu, index.data(), dmat, nbins, row_stride); } else if (curent_bin_size == BinTypeSize::kUint16BinsTypeSize) { - SetIndexData(qu, index.data(), dmat, nbins, row_stride, offsets); + SetIndexData(qu, index.data(), dmat, nbins, row_stride); } else { CHECK_EQ(curent_bin_size, BinTypeSize::kUint32BinsTypeSize); - SetIndexData(qu, index.data(), dmat, nbins, row_stride, offsets); + SetIndexData(qu, index.data(), dmat, nbins, row_stride); } /* For sparse DMatrix we have to store index of feature for each bin in index field to chose right offset. So offset is nullptr and index is not reduced */ } else { sort_buff.Resize(qu, n_rows * row_stride * sizeof(uint32_t)); - SetIndexData(qu, index.data(), dmat, nbins, row_stride, offsets); + SetIndexData(qu, index.data(), dmat, nbins, row_stride); } } diff --git a/plugin/sycl/data/gradient_index.h b/plugin/sycl/data/gradient_index.h index 298f989babcd..7f9cf0bb4a44 100644 --- a/plugin/sycl/data/gradient_index.h +++ b/plugin/sycl/data/gradient_index.h @@ -29,13 +29,6 @@ struct Index { Index& operator=(Index i) = delete; Index(Index&& i) = delete; Index& operator=(Index&& i) = delete; - uint32_t operator[](size_t i) const { - if (!offset_.Empty()) { - return func_(data_.DataConst(), i) + offset_[i%p_]; - } else { - return func_(data_.DataConst(), i); - } - } void SetBinTypeSize(BinTypeSize binTypeSize) { binTypeSize_ = binTypeSize; switch (binTypeSize) { @@ -68,14 +61,6 @@ struct Index { return reinterpret_cast(data_.DataConst()); } - uint32_t* Offset() { - return offset_.Data(); - } - - const uint32_t* Offset() const { - return offset_.DataConst(); - } - size_t Size() const { return data_.Size() / (binTypeSize_); } @@ -84,11 +69,6 @@ struct Index { data_.Resize(qu_, nBytesData); } - void ResizeOffset(const size_t nDisps) { - offset_.Resize(qu_, nDisps); - p_ = nDisps; - } - uint8_t* begin() const { return data_.Begin(); } @@ -115,10 +95,7 @@ struct Index { using Func = uint32_t (*)(const uint8_t*, size_t); USMVector data_; - // size of this field is equal to number of features - USMVector offset_; BinTypeSize binTypeSize_ {BinTypeSize::kUint8BinsTypeSize}; - size_t p_ {1}; Func func_; ::sycl::queue* qu_; @@ -149,10 +126,10 @@ struct GHistIndexMatrix { void Init(::sycl::queue* qu, Context const * ctx, DMatrix *dmat, int max_num_bins); - template + template void SetIndexData(::sycl::queue* qu, BinIdxType* index_data, DMatrix *dmat, - size_t nbins, size_t row_stride, const uint32_t* offsets); + size_t nbins, size_t row_stride); void ResizeIndex(size_t n_index, bool isDense);