diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh
index 7a2ce40add9d..e85bb0970d26 100644
--- a/src/data/ellpack_page.cuh
+++ b/src/data/ellpack_page.cuh
@@ -141,6 +141,13 @@ struct EllpackDeviceAccessor {
 
 class GHistIndexMatrix;
 
+/**
+ * @brief This is either an Ellpack format matrix or a dense matrix.
+ *
+ * When there's no compression can be made by using ellpack, we use this structure as a
+ * simple dense matrix. For dense matrix, we can provide extra compression by counting the
+ * histogram bin for each feature instead of for the entire dataset.
+ */
 class EllpackPageImpl {
  public:
   /**
@@ -152,7 +159,7 @@ class EllpackPageImpl {
   EllpackPageImpl() = default;
 
   /**
-   * @brief Constructor from an existing EllpackInfo.
+   * @brief Constructor from existing ellpack matrics.
    *
    * This is used in the sampling case. The ELLPACK page is constructed from an existing
    * Ellpack page and the given number of rows.
diff --git a/src/data/ellpack_page.h b/src/data/ellpack_page.h
index bbd6db0eef7f..8e54855049a2 100644
--- a/src/data/ellpack_page.h
+++ b/src/data/ellpack_page.h
@@ -17,6 +17,8 @@ class EllpackPageImpl;
  *
  * This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid
  * including CUDA-specific implementation details in the header.
+ *
+ * See @ref EllpackPageImpl .
  */
 class EllpackPage {
  public:
diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu
index 932d43ecdfa6..db843cd0564f 100644
--- a/tests/cpp/data/test_ellpack_page.cu
+++ b/tests/cpp/data/test_ellpack_page.cu
@@ -293,7 +293,9 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
 
   void CheckFromAdapter(std::size_t null_column) {
     bst_idx_t n_samples = 16, n_features = 8;
-    HostDeviceVector<float> data(n_samples * n_features);
+
+    auto ctx = MakeCUDACtx(0);
+    HostDeviceVector<float> data(n_samples * n_features, 0.0f, ctx.Device());
     auto& h_data = data.HostVector();
     std::iota(h_data.begin(), h_data.end(), 0.0f);
     for (std::size_t i = 0; i < h_data.size(); i += n_features) {
@@ -302,11 +304,11 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
     h_data[null_column] = null_column;  // Keep the first sample full.
     auto p_fmat = GetDMatrixFromData(h_data, n_samples, n_features);
 
+    data.ConstDeviceSpan();  // Pull to device
     auto arri = GetArrayInterface(&data, n_samples, n_features);
     auto sarri = Json::Dump(arri);
     data::CupyAdapter adapter{StringView{sarri}};
 
-    auto ctx = MakeCUDACtx(0);
     Context cpu_ctx;
     auto batch = BatchParam{static_cast<bst_bin_t>(p_fmat->Info().num_row_), 0.8};
 
@@ -317,11 +319,13 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
     dh::device_vector<bst_idx_t> row_counts(n_samples, n_features - 1);
     row_counts[0] = n_features;
     auto d_row_counts = dh::ToSpan(row_counts);
+    ASSERT_EQ(adapter.NumColumns(), n_features);
     auto impl =
         EllpackPageImpl{&ctx,       adapter.Value(), std::numeric_limits<float>::quiet_NaN(),
                         false,      d_row_counts,    {},
                         n_features, n_samples,       cuts};
     this->CheckBasic(&ctx, batch, null_column, impl);
+    dh::DefaultStream().Sync();
   }
 
   void CheckFromToGHist(std::size_t null_column) {