Skip to content

Commit

Permalink
Fixes #96: Get rid of __restrict__ qualifiers in our test programs.
Browse files Browse the repository at this point in the history
  • Loading branch information
eyalroz committed Sep 1, 2021
1 parent 8e15efd commit 4315b35
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 69 deletions.
88 changes: 44 additions & 44 deletions tests/atomics.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ namespace kernels {

template <typename T, unsigned ElementsPerThread>
__global__ void test_add(
T* __restrict__ result,
const T* __restrict__ data,
T* result,
const T* data,
std::size_t data_size)
{
// Notes:
Expand All @@ -88,8 +88,8 @@ __global__ void test_add(

template <typename T, unsigned ElementsPerThread>
__global__ void test_subtract(
T* __restrict__ result,
const T* __restrict__ data,
T* result,
const T* data,
std::size_t data_size)
{
// Notes:
Expand All @@ -110,8 +110,8 @@ __global__ void test_subtract(

template <typename T, unsigned ElementsPerThread>
__global__ void test_exchange(
T* __restrict__ extra_datum,
T* __restrict__ data,
T* extra_datum,
T* data,
std::size_t data_size)
{
auto global_thread_index = threadIdx.x + blockIdx.x * blockDim.x;
Expand All @@ -129,8 +129,8 @@ __global__ void test_exchange(

template <typename T, typename SeqType>
__global__ void test_inc_dec_sequences(
T* __restrict__ aggregate,
SeqType* __restrict__ inc_dec_sequences)
T* aggregate,
SeqType* inc_dec_sequences)
{
auto global_thread_index = threadIdx.x + blockIdx.x * blockDim.x;

Expand All @@ -151,8 +151,8 @@ template <typename F, typename T, typename... Is>
__global__ void execute_testcase(
F testcase_device_function,
size_t num_values_to_populate,
T* __restrict__ values_to_populate,
const Is* __restrict__ ... inputs
T* values_to_populate,
const Is* ... inputs
)
{
testcase_device_function(num_values_to_populate, values_to_populate, inputs...);
Expand Down Expand Up @@ -180,7 +180,7 @@ auto execute_testcase_on_gpu(
cuda::launch_configuration_t launch_config,
size_t num_values_to_populate,
T result_initial_fill_value,
Is* __restrict__ ... inputs)
Is* ... inputs)
{
cuda::device_t device { cuda::device::current::get() };
auto device_side_results { cuda::memory::device::make_unique<T[]>(device, num_values_to_populate) };
Expand Down Expand Up @@ -222,7 +222,7 @@ auto execute_non_uniform_testcase_on_gpu(
T result_initial_fill_value,
cuda::grid::dimensions_t grid_dimensions,
cuda::grid::block_dimensions_t block_dimensions,
Is* __restrict__ ... inputs)
Is* ... inputs)
{
auto launch_config { cuda::make_launch_config(grid_dimensions, block_dimensions) };

Expand Down Expand Up @@ -595,8 +595,8 @@ TEST_CASE_TEMPLATE("min - random values from host", T, INTEGER_TYPES, FLOAT_TYPE
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -645,8 +645,8 @@ TEST_CASE_TEMPLATE("max - random values from host", T, INTEGER_TYPES, FLOAT_TYPE
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -691,8 +691,8 @@ TEST_CASE_TEMPLATE("min - single outlier", T, INTEGER_TYPES, FLOAT_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -738,8 +738,8 @@ TEST_CASE_TEMPLATE("max - single outlier", T, INTEGER_TYPES, FLOAT_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -784,8 +784,8 @@ TEST_CASE_TEMPLATE("logical_and - single outlier", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -828,8 +828,8 @@ TEST_CASE_TEMPLATE("logical_or - single outlier", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -874,8 +874,8 @@ TEST_CASE_TEMPLATE("logical_xor - single outlier 0", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -919,8 +919,8 @@ TEST_CASE_TEMPLATE("logical_xor - single outlier 1", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -963,7 +963,7 @@ TEST_CASE_TEMPLATE("logical_not - single non-negator", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate)
T* aggregate)
{
namespace gi = kat::linear_grid;

Expand Down Expand Up @@ -1004,7 +1004,7 @@ TEST_CASE_TEMPLATE("logical_not - single negater", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate)
T* aggregate)
{
namespace gi = kat::linear_grid;

Expand Down Expand Up @@ -1049,8 +1049,8 @@ TEST_CASE_TEMPLATE("logical_not - by random threads", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict target,
const fake_bool* __restrict perform_op_indicators)
T* target,
const fake_bool* perform_op_indicators)
{
namespace gi = kat::linear_grid;
bool perform_op = perform_op_indicators[gi::thread::global_index()];
Expand Down Expand Up @@ -1103,8 +1103,8 @@ TEST_CASE_TEMPLATE("bitwise_and - single outliers", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -1153,8 +1153,8 @@ TEST_CASE_TEMPLATE("bitwise_or - single outliers", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict aggregate,
const T* __restrict input_data)
T* aggregate,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -1200,8 +1200,8 @@ TEST_CASE_TEMPLATE("bitwise_xor - random values from host", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict target,
const T* __restrict input_data)
T* target,
const T* input_data)
{
namespace gi = kat::linear_grid;
auto thread_element = input_data[gi::thread::global_index()];
Expand Down Expand Up @@ -1249,8 +1249,8 @@ TEST_CASE_TEMPLATE("bitwise_not - by random threads", T, INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict target,
const fake_bool* __restrict perform_op_indicators)
T* target,
const fake_bool* perform_op_indicators)
{
namespace gi = kat::linear_grid;
bool perform_op = perform_op_indicators[gi::thread::global_index()];
Expand Down Expand Up @@ -1315,8 +1315,8 @@ TEST_CASE_TEMPLATE("set_bit - few outliers", T, long int) { // INTEGER_TYPES) {
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict target,
const bit_index_type* __restrict bit_indices
T* target,
const bit_index_type* bit_indices
)
{
namespace gi = kat::linear_grid;
Expand Down Expand Up @@ -1380,8 +1380,8 @@ TEST_CASE_TEMPLATE("unset_bit - few outliers", T, long int) { // INTEGER_TYPES)
auto testcase_device_function =
[=] KAT_DEV (
size_t,
T* __restrict target,
const bit_index_type* __restrict bit_indices
T* target,
const bit_index_type* bit_indices
)
{
namespace gi = kat::linear_grid;
Expand Down
24 changes: 12 additions & 12 deletions tests/builtins.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ template <typename DeviceFunctionHook, typename R, typename... Is>
__global__ void execute_testcases(
// F f,
size_t num_checks,
fake_bool* __restrict__ execution_complete,
R* __restrict__ results,
const Is* __restrict__ ... inputs
fake_bool* execution_complete,
R* results,
const Is* ... inputs
)
{
auto global_thread_index = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down Expand Up @@ -244,10 +244,10 @@ void check_results(
size_t num_checks,
const char* testcase_name,
// perhaps add another parameter for specific individual-check details?
const R* __restrict__ actual_results,
const R* actual_results,
F expected_result_retriever,
optional<R> comparison_tolerance_fraction,
const Is* __restrict__... inputs)
const Is* ... inputs)
{
std::stringstream ss;
auto index_width = set_width_for_up_to(num_checks);
Expand Down Expand Up @@ -295,7 +295,7 @@ auto execute_testcase_on_gpu(
const char* testcase_name,
cuda::launch_configuration_t launch_config,
size_t num_checks,
Is* __restrict__ ... inputs)
Is* ... inputs)
{
cuda::device_t device { cuda::device::current::get() };
auto device_side_results { cuda::memory::device::make_unique<R[]>(device, num_checks) };
Expand Down Expand Up @@ -333,13 +333,13 @@ auto execute_testcase_on_gpu(
template <typename K, typename R, typename... Is, size_t... Indices>
void execute_testcase_on_gpu_and_check(
std::index_sequence<Indices...> is,
const R* __restrict__ expected_results,
const R* expected_results,
K testcase_kernel,
const char* testcase_name,
cuda::launch_configuration_t launch_config,
size_t num_checks,
optional<R> comparison_tolerance_fraction,
Is* __restrict__ ... inputs)
Is* ... inputs)
{
auto host_side_results = execute_testcase_on_gpu(
tag<R>{},
Expand All @@ -365,10 +365,10 @@ void execute_testcase_on_gpu_and_check(
template <typename DeviceFunctionHook, typename R, typename... Is>
void execute_uniform_builtin_testcase_on_gpu_and_check(
DeviceFunctionHook dfh,
const R* __restrict__ expected_results,
const R* expected_results,
size_t num_checks,
optional<R> comparison_tolerance_fraction,
Is* __restrict__ ... inputs)
Is* ... inputs)
{
auto block_size { 128 };
auto num_grid_blocks { div_rounding_up(num_checks, block_size) };
Expand Down Expand Up @@ -400,12 +400,12 @@ void execute_uniform_builtin_testcase_on_gpu_and_check(
template <typename DeviceFunctionHook, typename R, typename... Is>
void execute_non_uniform_builtin_testcase_on_gpu_and_check(
DeviceFunctionHook dfh,
const R* __restrict__ expected_results,
const R* expected_results,
size_t num_checks,
cuda::grid::dimension_t num_grid_blocks,
cuda::grid::block_dimension_t block_size,
optional<R> comparison_tolerance_fraction,
Is* __restrict__ ... inputs)
Is* ... inputs)
{
auto launch_config { cuda::make_launch_config(num_grid_blocks, block_size) };
// TODO: Should we check that num_checks is equal to the number of grid threads?
Expand Down
Loading

0 comments on commit 4315b35

Please sign in to comment.