diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 8a48126e195..a5b248135c1 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -40,8 +40,13 @@ target_include_directories(
 
 # Use an OBJECT library so we only compile these helper source files only once
 add_library(
-  cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
-                               synchronization/synchronization.cpp io/cuio_common.cpp
+  cudf_benchmark_common OBJECT
+  "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
+  synchronization/synchronization.cpp
+  io/cuio_common.cpp
+  common/table_utilities.cpp
+  common/benchmark_utilities.cpp
+  common/nvbench_utilities.cpp
 )
 target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env>)
 add_custom_command(
diff --git a/cpp/benchmarks/common/benchmark_utilities.cpp b/cpp/benchmarks/common/benchmark_utilities.cpp
new file mode 100644
index 00000000000..0b9fc17e779
--- /dev/null
+++ b/cpp/benchmarks/common/benchmark_utilities.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark_utilities.hpp"
+
+void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration)
+{
+  state.SetItemsProcessed(state.iterations() * items_processed_per_iteration);
+}
+
+void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration)
+{
+  state.SetBytesProcessed(state.iterations() * bytes_processed_per_iteration);
+}
diff --git a/cpp/benchmarks/common/benchmark_utilities.hpp b/cpp/benchmarks/common/benchmark_utilities.hpp
new file mode 100644
index 00000000000..c5c80e73674
--- /dev/null
+++ b/cpp/benchmarks/common/benchmark_utilities.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <benchmark/benchmark.h>
+
+/**
+ * @brief Sets the number of items processed during the benchmark.
+ *
+ * This function could be used instead of ::benchmark::State.SetItemsProcessed()
+ * to avoid repeatedly computing ::benchmark::State.iterations() * items_processed_per_iteration.
+ *
+ * @param state the benchmark state
+ * @param items_processed_per_iteration number of items processed per iteration
+ */
+void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration);
+
+/**
+ * @brief Sets the number of bytes processed during the benchmark.
+ *
+ * This function could be used instead of ::benchmark::State.SetItemsProcessed()
+ * to avoid repeatedly computing ::benchmark::State.iterations() * bytes_processed_per_iteration.
+ *
+ * @param state the benchmark state
+ * @param bytes_processed_per_iteration number of bytes processed per iteration
+ */
+void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration);
diff --git a/cpp/benchmarks/common/nvbench_utilities.cpp b/cpp/benchmarks/common/nvbench_utilities.cpp
new file mode 100644
index 00000000000..c740eaa52f4
--- /dev/null
+++ b/cpp/benchmarks/common/nvbench_utilities.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nvbench_utilities.hpp"
+
+#include <nvbench/nvbench.cuh>
+
+// This function is copied over from
+// https://github.com/NVIDIA/nvbench/blob/a171514056e5d6a7f52a035dd6c812fa301d4f4f/nvbench/detail/measure_cold.cu#L190-L224.
+void set_throughputs(nvbench::state& state)
+{
+  double avg_cuda_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+
+  if (const auto items = state.get_element_count(); items != 0) {
+    auto& summ = state.add_summary("nv/cold/bw/item_rate");
+    summ.set_string("name", "Elem/s");
+    summ.set_string("hint", "item_rate");
+    summ.set_string("description", "Number of input elements processed per second");
+    summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
+  }
+
+  if (const auto bytes = state.get_global_memory_rw_bytes(); bytes != 0) {
+    const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
+    {
+      auto& summ = state.add_summary("nv/cold/bw/global/bytes_per_second");
+      summ.set_string("name", "GlobalMem BW");
+      summ.set_string("hint", "byte_rate");
+      summ.set_string("description",
+                      "Number of bytes read/written per second to the CUDA "
+                      "device's global memory");
+      summ.set_float64("value", avg_used_gmem_bw);
+    }
+
+    {
+      const auto peak_gmem_bw =
+        static_cast<double>(state.get_device()->get_global_memory_bus_bandwidth());
+
+      auto& summ = state.add_summary("nv/cold/bw/global/utilization");
+      summ.set_string("name", "BWUtil");
+      summ.set_string("hint", "percentage");
+      summ.set_string("description",
+                      "Global device memory utilization as a percentage of the "
+                      "device's peak bandwidth");
+      summ.set_float64("value", avg_used_gmem_bw / peak_gmem_bw);
+    }
+  }
+}
diff --git a/cpp/benchmarks/common/nvbench_utilities.hpp b/cpp/benchmarks/common/nvbench_utilities.hpp
new file mode 100644
index 00000000000..98d879efac5
--- /dev/null
+++ b/cpp/benchmarks/common/nvbench_utilities.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace nvbench {
+struct state;
+}
+
+/**
+ * @brief Sets throughput statistics, such as "Elem/s", "GlobalMem BW", and "BWUtil" for the
+ * nvbench results summary.
+ *
+ * This function could be used to work around a known issue that the throughput statistics
+ * should be added before the nvbench::state.exec() call, otherwise they will not be printed
+ * in the summary. See https://github.com/NVIDIA/nvbench/issues/175 for more details.
+ */
+void set_throughputs(nvbench::state& state);
diff --git a/cpp/benchmarks/common/table_utilities.cpp b/cpp/benchmarks/common/table_utilities.cpp
new file mode 100644
index 00000000000..a6fbdac9fb8
--- /dev/null
+++ b/cpp/benchmarks/common/table_utilities.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "table_utilities.hpp"
+
+#include <cudf/reduction.hpp>
+#include <cudf/transform.hpp>
+
+#include <cmath>
+
+int64_t estimate_size(cudf::column_view const& col)
+{
+  return estimate_size(cudf::table_view({col}));
+}
+
+int64_t estimate_size(cudf::table_view const& view)
+{
+  // Compute the size in bits for each row.
+  auto const row_sizes = cudf::row_bit_count(view);
+  // Accumulate the row sizes to compute a sum.
+  auto const agg = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
+  cudf::data_type sum_dtype{cudf::type_id::INT64};
+  auto const total_size_scalar = cudf::reduce(*row_sizes, *agg, sum_dtype);
+  auto const total_size_in_bits =
+    static_cast<cudf::numeric_scalar<int64_t>*>(total_size_scalar.get())->value();
+  // Convert the size in bits to the size in bytes.
+  return static_cast<int64_t>(std::ceil(static_cast<double>(total_size_in_bits) / 8));
+}
diff --git a/cpp/benchmarks/common/table_utilities.hpp b/cpp/benchmarks/common/table_utilities.hpp
new file mode 100644
index 00000000000..04ee847d397
--- /dev/null
+++ b/cpp/benchmarks/common/table_utilities.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/table/table_view.hpp>
+
+/**
+ * @brief Estimates the column size in bytes.
+ *
+ * @remark As this function internally uses cudf::row_bit_count() to estimate each row size
+ * and accumulates them, the returned estimate may be an inexact approximation in some
+ * cases. See cudf::row_bit_count() for more details.
+ *
+ * @param view The column view to estimate its size
+ */
+int64_t estimate_size(cudf::column_view const& view);
+
+/**
+ * @brief Estimates the table size in bytes.
+ *
+ * @remark As this function internally uses cudf::row_bit_count() to estimate each row size
+ * and accumulates them, the returned estimate may be an inexact approximation in some
+ * cases. See cudf::row_bit_count() for more details.
+ *
+ * @param view The table view to estimate its size
+ */
+int64_t estimate_size(cudf::table_view const& view);
diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp
index 8b1e71c1585..e9d23881764 100644
--- a/cpp/benchmarks/reduction/anyall.cpp
+++ b/cpp/benchmarks/reduction/anyall.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
+#include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
@@ -42,6 +44,10 @@ void BM_reduction_anyall(benchmark::State& state,
     cuda_event_timer timer(state, true);
     auto result = cudf::reduce(*values, *agg, output_dtype);
   }
+
+  // The benchmark takes a column and produces one scalar.
+  set_items_processed(state, column_size + 1);
+  set_bytes_processed(state, estimate_size(values->view()) + cudf::size_of(output_dtype));
 }
 
 #define concat(a, b, c) a##b##c
diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp
index c1c44c919ac..5095337dbb3 100644
--- a/cpp/benchmarks/reduction/dictionary.cpp
+++ b/cpp/benchmarks/reduction/dictionary.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
@@ -52,6 +53,13 @@ void BM_reduction_dictionary(benchmark::State& state,
     cuda_event_timer timer(state, true);
     auto result = cudf::reduce(*values, *agg, output_dtype);
   }
+
+  // The benchmark takes a column and produces two scalars.
+  set_items_processed(state, column_size + 1);
+
+  // We don't set the metrics for the size read/written as row_bit_count() doesn't
+  // support the dictionary type yet (and so is estimate_size()).
+  // See https://github.com/rapidsai/cudf/issues/16121 for details.
 }
 
 #define concat(a, b, c) a##b##c
diff --git a/cpp/benchmarks/reduction/minmax.cpp b/cpp/benchmarks/reduction/minmax.cpp
index 963c26692e7..050f2887221 100644
--- a/cpp/benchmarks/reduction/minmax.cpp
+++ b/cpp/benchmarks/reduction/minmax.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
+#include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
@@ -28,14 +30,19 @@ template <typename type>
 void BM_reduction(benchmark::State& state)
 {
   cudf::size_type const column_size{(cudf::size_type)state.range(0)};
-  auto const dtype = cudf::type_to_id<type>();
+  auto const dtype_id = cudf::type_to_id<type>();
   auto const input_column =
-    create_random_column(dtype, row_count{column_size}, data_profile_builder().no_validity());
+    create_random_column(dtype_id, row_count{column_size}, data_profile_builder().no_validity());
 
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
     auto result = cudf::minmax(*input_column);
   }
+
+  // The benchmark takes a column and produces two scalars.
+  set_items_processed(state, column_size + 2);
+  cudf::data_type dtype = cudf::data_type{dtype_id};
+  set_bytes_processed(state, estimate_size(input_column->view()) + 2 * cudf::size_of(dtype));
 }
 
 #define concat(a, b, c) a##b##c
diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
index e55f3b9e09f..14876c80d3e 100644
--- a/cpp/benchmarks/reduction/rank.cpp
+++ b/cpp/benchmarks/reduction/rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,8 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 
 #include <cudf/detail/scan.hpp>
 #include <cudf/filling.hpp>
@@ -39,11 +41,18 @@ static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list<typ
   auto const new_tbl = cudf::repeat(table->view(), 2);
   cudf::column_view input(new_tbl->view().column(0));
 
+  std::unique_ptr<cudf::column> result = nullptr;
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
-    auto result = cudf::detail::inclusive_dense_rank_scan(
+    result = cudf::detail::inclusive_dense_rank_scan(
       input, stream_view, rmm::mr::get_current_device_resource());
   });
+
+  state.add_element_count(input.size());
+  state.add_global_memory_reads(estimate_size(input));
+  state.add_global_memory_writes(estimate_size(result->view()));
+
+  set_throughputs(state);
 }
 
 using data_type = nvbench::type_list<int32_t, cudf::list_view>;
diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp
index 5bd3e2e3bba..63c96f4fe9e 100644
--- a/cpp/benchmarks/reduction/reduce.cpp
+++ b/cpp/benchmarks/reduction/reduce.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
+#include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
@@ -46,6 +48,10 @@ void BM_reduction(benchmark::State& state, std::unique_ptr<cudf::reduce_aggregat
     cuda_event_timer timer(state, true);
     auto result = cudf::reduce(*input_column, *agg, output_dtype);
   }
+
+  // The benchmark takes a column and produces two scalars.
+  set_items_processed(state, column_size + 1);
+  set_bytes_processed(state, estimate_size(input_column->view()) + cudf::size_of(output_dtype));
 }
 
 #define concat(a, b, c) a##b##c
diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp
index 8c9883ece9c..dc05aad9807 100644
--- a/cpp/benchmarks/reduction/scan.cpp
+++ b/cpp/benchmarks/reduction/scan.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
+#include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
@@ -34,11 +36,16 @@ static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
   auto const column = create_random_column(dtype, row_count{n_rows});
   if (!include_nulls) column->set_null_mask(rmm::device_buffer{}, 0);
 
+  std::unique_ptr<cudf::column> result = nullptr;
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
-    auto result = cudf::scan(
+    result = cudf::scan(
       *column, *cudf::make_min_aggregation<cudf::scan_aggregation>(), cudf::scan_type::INCLUSIVE);
   }
+
+  // The benchmark takes a column and produces a new column of the same size as input.
+  set_items_processed(state, n_rows * 2);
+  set_bytes_processed(state, estimate_size(column->view()) + estimate_size(result->view()));
 }
 
 #define SCAN_BENCHMARK_DEFINE(name, type, nulls)                          \
diff --git a/cpp/benchmarks/reduction/scan_structs.cpp b/cpp/benchmarks/reduction/scan_structs.cpp
index ee97b54fbef..a781f75a314 100644
--- a/cpp/benchmarks/reduction/scan_structs.cpp
+++ b/cpp/benchmarks/reduction/scan_structs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,8 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/scan.hpp>
@@ -45,16 +47,24 @@ static void nvbench_structs_scan(nvbench::state& state)
   auto [null_mask, null_count] = create_random_null_mask(size, null_probability);
   auto const input             = cudf::make_structs_column(
     size, std::move(data_table->release()), null_count, std::move(null_mask));
+  auto input_view = input->view();
 
   auto const agg         = cudf::make_min_aggregation<cudf::scan_aggregation>();
   auto const null_policy = static_cast<cudf::null_policy>(state.get_int64("null_policy"));
   auto const stream      = cudf::get_default_stream();
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  std::unique_ptr<cudf::column> result = nullptr;
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto const result = cudf::detail::scan_inclusive(
-      *input, *agg, null_policy, stream, rmm::mr::get_current_device_resource());
+    result = cudf::detail::scan_inclusive(
+      input_view, *agg, null_policy, stream, rmm::mr::get_current_device_resource());
   });
+
+  state.add_element_count(input_view.size());
+  state.add_global_memory_reads(estimate_size(input_view));
+  state.add_global_memory_writes(estimate_size(result->view()));
+
+  set_throughputs(state);
 }
 
 NVBENCH_BENCH(nvbench_structs_scan)