Skip to content

Commit

Permalink
fix tests fails
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry Razdoburdin committed Feb 7, 2024
1 parent 527af09 commit 54f577a
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 53 deletions.
2 changes: 1 addition & 1 deletion include/xgboost/predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class Predictor {
*/
virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
const gbm::GBTreeModel& model, uint32_t tree_begin,
uint32_t tree_end = 0) const = 0;
uint32_t tree_end = 0, bool training = false) const = 0;

/**
* \brief Inplace prediction.
Expand Down
28 changes: 14 additions & 14 deletions plugin/sycl/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define PLUGIN_SYCL_COMMON_HIST_UTIL_H_

#include <vector>
#include <unordered_map>

#include "../data.h"
#include "row_set.h"
Expand Down Expand Up @@ -62,11 +63,11 @@ class HistCollection {

// Access histogram for i-th node
GHistRowT& operator[](bst_uint nid) {
return data_[nid];
return *(data_.at(nid));
}

const GHistRowT& operator[](bst_uint nid) const {
return data_[nid];
return *(data_.at(nid));
}

// Initialize histogram collection
Expand All @@ -78,20 +79,19 @@ class HistCollection {
}
}

// Reserve the space for hist rows
void Reserve(bst_uint max_nid) {
data_.reserve(max_nid + 1);
}

// Create an empty histogram for i-th node
::sycl::event AddHistRow(bst_uint nid) {
if (nid >= data_.size()) {
data_.resize(nid + 1);
}
::sycl::event event;
data_[nid].Resize(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
if (data_.count(nid) == 0) {
data_[nid] =
std::make_shared<GHistRowT>(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
} else {
data_[nid]->Resize(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
}
return event;
}

Expand All @@ -103,7 +103,7 @@ class HistCollection {
/*! \brief Number of all bins over all features */
uint32_t nbins_ = 0;

std::vector<GHistRowT> data_;
std::unordered_map<uint32_t, std::shared_ptr<GHistRowT>> data_;

::sycl::queue qu_;
};
Expand Down
33 changes: 24 additions & 9 deletions plugin/sycl/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ class USMVector {
qu->fill(data_.get(), v, size_).wait();
}

USMVector(::sycl::queue* qu, size_t size, T v,
::sycl::event* event) : size_(size), capacity_(size) {
data_ = allocate_memory_(qu, size_);
*event = qu->fill(data_.get(), v, size_, *event);
}

USMVector(::sycl::queue* qu, const std::vector<T> &vec) {
size_ = vec.size();
capacity_ = size_;
Expand All @@ -92,12 +98,9 @@ class USMVector {
~USMVector() {
}

USMVector<T>& operator=(const USMVector<T>& other) {
size_ = other.size_;
capacity_ = other.capacity_;
data_ = other.data_;
return *this;
}
USMVector(const USMVector&) = delete;

USMVector<T>& operator=(const USMVector<T>& other) = delete;

T* Data() { return data_.get(); }
const T* DataConst() const { return data_.get(); }
Expand Down Expand Up @@ -220,14 +223,26 @@ struct DeviceMatrix {

DeviceMatrix() = default;

void Init(::sycl::queue qu, DMatrix* dmat) {
if (p_mat == dmat) {
DeviceMatrix(const DeviceMatrix& other) = delete;

DeviceMatrix& operator= (const DeviceMatrix& other) = delete;

// During training the same dmatrix is used, so we don't need reload it on device
bool ReinitializationRequired(DMatrix* dmat, bool training) {
if (!training) return true;
if (p_mat != dmat) return true;
return false;
}

void Init(::sycl::queue qu, DMatrix* dmat, bool training = false) {
qu_ = qu;
if (!ReinitializationRequired(dmat, training)) {
is_from_cache = true;
return;
}

is_from_cache = false;
p_mat = dmat;
qu_ = qu;

size_t num_row = 0;
size_t num_nonzero = 0;
Expand Down
5 changes: 2 additions & 3 deletions plugin/sycl/predictor/predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,10 @@ class Predictor : public xgboost::Predictor {

void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts,
const gbm::GBTreeModel &model, uint32_t tree_begin,
uint32_t tree_end = 0) const override {
uint32_t tree_end = 0, bool training = false) const override {
::sycl::queue qu = device_manager.GetQueue(ctx_->Device());

predictor_monitor_.Start("InitDeviceMatrix");
device_matrix.Init(qu, dmat);
device_matrix.Init(qu, dmat, training);
predictor_monitor_.Stop("InitDeviceMatrix");

auto* out_preds = &predts->predictions;
Expand Down
14 changes: 12 additions & 2 deletions plugin/sycl/tree/split_evaluator.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,31 @@ class TreeEvaluator {
bool has_constraint_;

public:
TreeEvaluator(::sycl::queue qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {
void Reset(::sycl::queue qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {
qu_ = qu;
if (p.monotone_constraints.empty()) {
monotone_.Resize(&qu_, n_features, 0);
has_constraint_ = false;
} else {
monotone_ = USMVector<int32_t>(&qu_, p.monotone_constraints);
// monotone_ = USMVector<int32_t>(&qu_, p.monotone_constraints);
// monotone_.Resize(&qu_, n_features, 0);

monotone_.Resize(&qu_, n_features, 0);
qu_.memcpy(monotone_.Data(), p.monotone_constraints.data(),
sizeof(int32_t) * p.monotone_constraints.size());
qu_.wait();

lower_bounds_.Resize(&qu_, p.MaxNodes(), -std::numeric_limits<GradType>::max());
upper_bounds_.Resize(&qu_, p.MaxNodes(), std::numeric_limits<GradType>::max());
has_constraint_ = true;
}
param_ = TrainParam(p);
}

TreeEvaluator(::sycl::queue qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {
Reset(qu, p, n_features);
}

struct SplitEvaluator {
int* constraints;
GradType* lower;
Expand Down
36 changes: 16 additions & 20 deletions plugin/sycl/tree/updater_quantile_hist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,12 @@ void BatchHistSynchronizer<GradientSumT>::SyncHistograms(BuilderT *builder,
hist_sync_events_.resize(builder->nodes_for_explicit_hist_build_.size());
for (int i = 0; i < builder->nodes_for_explicit_hist_build_.size(); i++) {
const auto entry = builder->nodes_for_explicit_hist_build_[i];
auto this_hist = builder->hist_[entry.nid];
auto& this_hist = builder->hist_[entry.nid];

if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) {
const size_t parent_id = (*p_tree)[entry.nid].Parent();
auto parent_hist = builder->hist_[parent_id];
auto sibling_hist = builder->hist_[entry.sibling_nid];
auto& parent_hist = builder->hist_[parent_id];
auto& sibling_hist = builder->hist_[entry.sibling_nid];
hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
this_hist, nbins, ::sycl::event());
}
Expand All @@ -169,19 +169,19 @@ void DistributedHistSynchronizer<GradientSumT>::SyncHistograms(BuilderT* builder
const size_t nbins = builder->hist_builder_.GetNumBins();
for (int node = 0; node < builder->nodes_for_explicit_hist_build_.size(); node++) {
const auto entry = builder->nodes_for_explicit_hist_build_[node];
auto this_hist = builder->hist_[entry.nid];
auto& this_hist = builder->hist_[entry.nid];
// Store posible parent node
auto this_local = builder->hist_local_worker_[entry.nid];
auto& this_local = builder->hist_local_worker_[entry.nid];
common::CopyHist(builder->qu_, &this_local, this_hist, nbins);

if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) {
const size_t parent_id = (*p_tree)[entry.nid].Parent();
auto parent_hist = builder->hist_local_worker_[parent_id];
auto sibling_hist = builder->hist_[entry.sibling_nid];
auto& parent_hist = builder->hist_local_worker_[parent_id];
auto& sibling_hist = builder->hist_[entry.sibling_nid];
common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
this_hist, nbins, ::sycl::event());
// Store posible parent node
auto sibling_local = builder->hist_local_worker_[entry.sibling_nid];
auto& sibling_local = builder->hist_local_worker_[entry.sibling_nid];
common::CopyHist(builder->qu_, &sibling_local, sibling_hist, nbins);
}
}
Expand All @@ -202,11 +202,11 @@ void DistributedHistSynchronizer<GradientSumT>::ParallelSubtractionHist(
for (int node = 0; node < nodes.size(); node++) {
const auto entry = nodes[node];
if (!((*p_tree)[entry.nid].IsLeftChild())) {
auto this_hist = builder->hist_[entry.nid];
auto& this_hist = builder->hist_[entry.nid];

if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) {
auto parent_hist = builder->hist_[(*p_tree)[entry.nid].Parent()];
auto sibling_hist = builder->hist_[entry.sibling_nid];
auto& parent_hist = builder->hist_[(*p_tree)[entry.nid].Parent()];
auto& sibling_hist = builder->hist_[entry.sibling_nid];
common::SubtractionHist(builder->qu_, &this_hist, parent_hist,
sibling_hist, nbins, ::sycl::event());
}
Expand All @@ -219,7 +219,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ReduceHists(const std::vector<int
size_t nbins) {
std::vector<GradientPairT> reduce_buffer(sync_ids.size() * nbins);
for (size_t i = 0; i < sync_ids.size(); i++) {
auto this_hist = hist_[sync_ids[i]];
auto& this_hist = hist_[sync_ids[i]];
const GradientPairT* psrc = reinterpret_cast<const GradientPairT*>(this_hist.DataConst());
std::copy(psrc, psrc + nbins, reduce_buffer.begin() + i * nbins);
}
Expand All @@ -228,7 +228,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ReduceHists(const std::vector<int
2 * nbins * sync_ids.size());
// histred_.Allreduce(reduce_buffer.data(), nbins * sync_ids.size());
for (size_t i = 0; i < sync_ids.size(); i++) {
auto this_hist = hist_[sync_ids[i]];
auto& this_hist = hist_[sync_ids[i]];
GradientPairT* psrc = reinterpret_cast<GradientPairT*>(this_hist.Data());
std::copy(reduce_buffer.begin() + i * nbins, reduce_buffer.begin() + (i + 1) * nbins, psrc);
}
Expand All @@ -249,7 +249,6 @@ void BatchHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
max_nid = node.nid > max_nid ? node.nid : max_nid;
}

builder->hist_.Reserve(max_nid);
for (auto const& entry : builder->nodes_for_explicit_hist_build_) {
int nid = entry.nid;
auto event = builder->hist_.AddHistRow(nid);
Expand Down Expand Up @@ -611,7 +610,7 @@ void QuantileHistMaker::Builder<GradientSumT>::Update(
builder_monitor_.Start("Update");

const std::vector<GradientPair>& gpair_h = gpair->ConstHostVector();
tree_evaluator_ = TreeEvaluator<GradientSumT>(qu_, param_, p_fmat->Info().num_col_);
tree_evaluator_.Reset(qu_, param_, p_fmat->Info().num_col_);
interaction_constraints_.Reset();

this->InitData(ctx, gmat, gpair_h, gpair_device, *p_fmat, *p_tree);
Expand Down Expand Up @@ -847,10 +846,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitData(
}

// initialize histogram builder
#pragma omp parallel
{
this->nthread_ = omp_get_num_threads();
}
this->nthread_ = omp_get_num_threads();
hist_builder_ = GHistBuilder<GradientSumT>(qu_, nbins);

USMVector<size_t, MemoryType::on_device>* row_indices = &(row_set_collection_.Data());
Expand Down Expand Up @@ -1284,7 +1280,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitNewNode(int nid,
}

{
auto hist = hist_[nid];
auto& hist = hist_[nid];
GradientPairT grad_stat;
if (tree[nid].IsRoot()) {
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
Expand Down
4 changes: 2 additions & 2 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ void GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds,
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) {
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end, is_training);
}
if (reset) {
out_preds->version = 0;
Expand Down Expand Up @@ -763,7 +763,7 @@ class Dart : public GBTree {
auto version = i / layer_trees();
p_out_preds->version = version;
predts.predictions.Fill(0);
predictor->PredictBatch(p_fmat, &predts, model_, i, i + 1);
predictor->PredictBatch(p_fmat, &predts, model_, i, i + 1, training);

// Multiple the weight to output prediction.
auto w = this->weight_drop_.at(i);
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/cpu_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ class CPUPredictor : public Predictor {
explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}

void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, const gbm::GBTreeModel &model,
uint32_t tree_begin, uint32_t tree_end = 0) const override {
uint32_t tree_begin, uint32_t tree_end = 0, bool training = false) const override {
auto *out_preds = &predts->predictions;
// This is actually already handled in gbm, but large amount of tests rely on the
// behaviour.
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ class GPUPredictor : public xgboost::Predictor {

void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
const gbm::GBTreeModel& model, uint32_t tree_begin,
uint32_t tree_end = 0) const override {
uint32_t tree_end = 0, bool training = false) const override {
int device = ctx_->gpu_id;
CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
auto* out_preds = &predts->predictions;
Expand Down

0 comments on commit 54f577a

Please sign in to comment.