diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh index cf1043ddb399..b95d9c5c1917 100644 --- a/src/common/hist_util.cuh +++ b/src/common/hist_util.cuh @@ -295,10 +295,47 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info, &cuts_ptr, &column_sizes_scan, &sorted_entries); + + auto cuts_ptr_h = cuts_ptr.HostVector(); + std::cout << "cuts ptr ------------------ num_cuts: " << num_cuts << std::endl; + for(uint64_t i : cuts_ptr_h) { + std::cout << i << " " << std::endl; + } + std::cout << "cuts ptr -- end----------------" << std::endl; + + thrust::host_vector column_sizes_scan_h = column_sizes_scan; + std::cout << "The column_sizes_scan_h : " << std::endl; + for (int i = 0; i < column_sizes_scan_h.size(); i++) { + std::cout << "column_sizes_scan_h: " << column_sizes_scan_h[i] << " "; + } + std::cout << "----------------------------------" << std::endl; + + thrust::host_vector sorted_entries_h = sorted_entries; + std::cout << "++++++++ The Entry without sorting: ++++++++++ " << std::endl; + for (int i = 0; i < sorted_entries_h.size(); i++) { + if (i % 4 == 0) { + std::cout << std::endl; + } + std::cout << sorted_entries_h[i].fvalue << " "; + + } + + dh::XGBDeviceAllocator alloc; thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(), sorted_entries.end(), detail::EntryCompareOp()); + thrust::host_vector sorted_entries_h_again = sorted_entries; + std::cout << std::endl; + std::cout << "------------The Entry with sorting: -----------------" << std::endl; + std::cout << "------------The Entry with sorting: -----------------" << std::endl; + for (int i = 0; i < sorted_entries_h_again.size(); i++) { + std::cout << sorted_entries_h_again[i].fvalue << " "; + if (i % 150 == 0 && i != 0) { + std::cout << std::endl; + } + } + std::cout << std::endl; if (sketch_container->HasCategorical()) { auto d_cuts_ptr = cuts_ptr.DeviceSpan(); detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr, &sorted_entries, nullptr, @@ -311,6 +348,10 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info, sketch_container->Push(dh::ToSpan(sorted_entries), dh::ToSpan(column_sizes_scan), d_cuts_ptr, h_cuts_ptr.back()); + + std::cout << std::endl; + std::cout << "-----------------the sketch_container ----------------- " << std::endl; + sorted_entries.clear(); sorted_entries.shrink_to_fit(); } @@ -414,6 +455,8 @@ void AdapterDeviceSketch(Batch batch, int num_bins, size_t num_rows = batch.NumRows(); size_t num_cols = batch.NumCols(); size_t num_cuts_per_feature = detail::RequiredSampleCutsPerColumn(num_bins, num_rows); + std::cout << "AdapterDeviceSketch num_rows: " << num_rows << " num_cols: " << num_cols << + " num_cuts_per_feature: " << num_cuts_per_feature << std::endl; auto device = sketch_container->DeviceIdx(); bool weighted = !info.weights_.Empty(); diff --git a/src/common/hist_util.h b/src/common/hist_util.h index e829752dae3d..d9bbc994f02f 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -65,6 +65,27 @@ class HistogramCuts { // storing minimum value in a sketch set. HostDeviceVector min_vals_; // NOLINT + void show() { + auto cut_values_h = cut_values_.HostVector(); + std::cout << "HistogramCuts, cut_values_h: " << std::endl; + for (auto v : cut_values_h) { + std::cout << v << " "; + } + std::cout << std::endl; + auto cut_ptrs_h = cut_ptrs_.HostVector(); + std::cout << "cut_ptrs_: " << std::endl; + for (auto v : cut_ptrs_h) { + std::cout << v << " "; + } + std::cout << std::endl; + auto min_vals_h = min_vals_.HostVector(); + std::cout << "min_vals_h: " << std::endl; + for (auto v : min_vals_h) { + std::cout << v << " "; + } + std::cout << std::endl; + } + HistogramCuts(); HistogramCuts(HistogramCuts const& that) { this->Copy(that); } diff --git a/src/common/quantile.cu b/src/common/quantile.cu index e7f09fc4d56a..abb45f835841 100644 --- a/src/common/quantile.cu +++ b/src/common/quantile.cu @@ -310,6 +310,7 @@ void SketchContainer::Push(Span entries, Span columns_ptr, Span out; dh::device_vector cuts; bool first_window = this->Current().empty(); + std::cout << "SketchContainer::Push " << total_cuts << std::endl; if (!first_window) { cuts.resize(total_cuts); out = dh::ToSpan(cuts); @@ -413,6 +414,7 @@ void SketchContainer::Prune(size_t to) { auto const& h_feature_types = feature_types_.ConstHostSpan(); for (bst_feature_t i = 0; i < num_columns_; ++i) { size_t length = this->Column(i).size(); + std::cout << "column: " << i << " SketchContainer Prune len: " << length << " maxBins: " << to << std::endl; length = std::min(length, to); if (IsCat(h_feature_types, i)) { length = this->Column(i).size(); @@ -588,6 +590,7 @@ void SketchContainer::MakeCuts(Context const* ctx, HistogramCuts* p_cuts, bool i // Sync between workers. this->AllReduce(ctx, is_column_split); + std::cout << "MakeCuts prune to " << num_bins_ + 1 << std::endl; // Prune to final number of bins. this->Prune(num_bins_ + 1); this->FixError(); diff --git a/src/common/quantile.cuh b/src/common/quantile.cuh index 898da03a0dce..983354433ad1 100644 --- a/src/common/quantile.cuh +++ b/src/common/quantile.cuh @@ -4,6 +4,7 @@ #ifndef XGBOOST_COMMON_QUANTILE_CUH_ #define XGBOOST_COMMON_QUANTILE_CUH_ +#include #include "xgboost/span.h" #include "xgboost/data.h" #include "device_helpers.cuh" @@ -87,6 +88,26 @@ class SketchContainer { } public: + void show() { + std::cout << "SketchContainer, rows: " << this->num_rows_ + << "cols: " << this->num_columns_ + << std::endl; + + thrust::host_vector entry_h = this->Current(); + std::cout << "entries in SketchContainer!" << std::endl; + for (auto v : entry_h) { + std::cout << v << std::endl; + } + std::cout << std::endl; + + auto columns_ptr_h = columns_ptr_.HostVector(); + + std::cout << "columns_ptr in SketchContainer!" << std::endl; + for (auto v: columns_ptr_h) { + std::cout << v << " "; + } + std::cout << std::endl; + } /* \breif GPU quantile structure, with sketch data for each columns. * * \param max_bin Maximum number of bins per columns diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh index c64462082481..47a537454222 100644 --- a/src/data/ellpack_page.cuh +++ b/src/data/ellpack_page.cuh @@ -210,6 +210,10 @@ class EllpackPageImpl { [[nodiscard]] EllpackDeviceAccessor GetHostAccessor( common::Span feature_types = {}) const; + void show() { + std::cout << "ELLpack is_dense: " << is_dense << " row_stride: " << row_stride + << " base_rowid: " << base_rowid << " n_rows: " << n_rows << std::endl; + } private: /*! * \brief Compress a single page of CSR data into ELLPACK. diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index 0d75d0651e26..d9c47b4d7f56 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -183,10 +183,14 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p, CHECK_EQ(n_features, num_cols()) << "Inconsistent number of columns."; } size_t batch_size = num_rows(); + batch_nnz.push_back(nnz_cnt()); nnz += batch_nnz.back(); accumulated_rows += batch_size; n_batches++; + + std::cout << "nnz: " << nnz << " batch_size: " << batch_size << std::endl; + } while (iter.Next()); iter.Reset(); @@ -199,6 +203,7 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p, })) << "Something went wrong during iteration."; CHECK_GE(n_features, 1) << "Data must has at least 1 column."; + std::cout << "Total nnz: " << nnz << " accumulated_rows: " << accumulated_rows << std::endl; /** * Generate quantiles diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu index 09a3976d785c..bf100b1479f7 100644 --- a/src/data/iterative_dmatrix.cu +++ b/src/data/iterative_dmatrix.cu @@ -86,6 +86,8 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, })); nnz += thrust::reduce(thrust::cuda::par(alloc), row_counts.begin(), row_counts.end()); batches++; + std::cout << "nnz: " << nnz << " batch_rows: " << batch_rows << + " accumulated_rows: " << accumulated_rows << std::endl; } while (iter.Next()); iter.Reset(); @@ -106,6 +108,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, sketch_containers.shrink_to_fit(); final_sketch.MakeCuts(ctx, &cuts, this->info_.IsColumnSplit()); + final_sketch.show(); } else { GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts); } @@ -113,6 +116,8 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, this->info_.num_row_ = accumulated_rows; this->info_.num_nonzero_ = nnz; + cuts.show(); + auto init_page = [this, &cuts, row_stride, accumulated_rows, get_device]() { if (!ellpack_) { // Should be put inside the while loop to protect against empty batch. In @@ -165,6 +170,8 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, CHECK_EQ(proxy->Info().labels.Size(), 0); } + ellpack_->Impl()->show(); + iter.Reset(); // Synchronise worker columns info_.SynchronizeNumberOfColumns(ctx);