Skip to content

Commit

Permalink
Update & fix.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Oct 2, 2024
1 parent 4e1152c commit 5d7c22f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
9 changes: 8 additions & 1 deletion src/data/ellpack_page.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ struct EllpackDeviceAccessor {

class GHistIndexMatrix;

/**
* @brief This is either an Ellpack format matrix or a dense matrix.
*
* When there's no compression can be made by using ellpack, we use this structure as a
* simple dense matrix. For dense matrix, we can provide extra compression by counting the
* histogram bin for each feature instead of for the entire dataset.
*/
class EllpackPageImpl {
public:
/**
Expand All @@ -152,7 +159,7 @@ class EllpackPageImpl {
EllpackPageImpl() = default;

/**
* @brief Constructor from an existing EllpackInfo.
* @brief Constructor from existing ellpack matrics.
*
* This is used in the sampling case. The ELLPACK page is constructed from an existing
* Ellpack page and the given number of rows.
Expand Down
2 changes: 2 additions & 0 deletions src/data/ellpack_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class EllpackPageImpl;
*
* This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid
* including CUDA-specific implementation details in the header.
*
* See @ref EllpackPageImpl .
*/
class EllpackPage {
public:
Expand Down
8 changes: 6 additions & 2 deletions tests/cpp/data/test_ellpack_page.cu
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,9 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {

void CheckFromAdapter(std::size_t null_column) {
bst_idx_t n_samples = 16, n_features = 8;
HostDeviceVector<float> data(n_samples * n_features);

auto ctx = MakeCUDACtx(0);
HostDeviceVector<float> data(n_samples * n_features, 0.0f, ctx.Device());
auto& h_data = data.HostVector();
std::iota(h_data.begin(), h_data.end(), 0.0f);
for (std::size_t i = 0; i < h_data.size(); i += n_features) {
Expand All @@ -302,11 +304,11 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
h_data[null_column] = null_column; // Keep the first sample full.
auto p_fmat = GetDMatrixFromData(h_data, n_samples, n_features);

data.ConstDeviceSpan(); // Pull to device
auto arri = GetArrayInterface(&data, n_samples, n_features);
auto sarri = Json::Dump(arri);
data::CupyAdapter adapter{StringView{sarri}};

auto ctx = MakeCUDACtx(0);
Context cpu_ctx;
auto batch = BatchParam{static_cast<bst_bin_t>(p_fmat->Info().num_row_), 0.8};

Expand All @@ -317,11 +319,13 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
dh::device_vector<bst_idx_t> row_counts(n_samples, n_features - 1);
row_counts[0] = n_features;
auto d_row_counts = dh::ToSpan(row_counts);
ASSERT_EQ(adapter.NumColumns(), n_features);
auto impl =
EllpackPageImpl{&ctx, adapter.Value(), std::numeric_limits<float>::quiet_NaN(),
false, d_row_counts, {},
n_features, n_samples, cuts};
this->CheckBasic(&ctx, batch, null_column, impl);
dh::DefaultStream().Sync();
}

void CheckFromToGHist(std::size_t null_column) {
Expand Down

0 comments on commit 5d7c22f

Please sign in to comment.