diff --git a/tests/atomics.cu b/tests/atomics.cu index f99aa9d..b1a4d38 100644 --- a/tests/atomics.cu +++ b/tests/atomics.cu @@ -67,8 +67,8 @@ namespace kernels { template __global__ void test_add( - T* __restrict__ result, - const T* __restrict__ data, + T* result, + const T* data, std::size_t data_size) { // Notes: @@ -88,8 +88,8 @@ __global__ void test_add( template __global__ void test_subtract( - T* __restrict__ result, - const T* __restrict__ data, + T* result, + const T* data, std::size_t data_size) { // Notes: @@ -110,8 +110,8 @@ __global__ void test_subtract( template __global__ void test_exchange( - T* __restrict__ extra_datum, - T* __restrict__ data, + T* extra_datum, + T* data, std::size_t data_size) { auto global_thread_index = threadIdx.x + blockIdx.x * blockDim.x; @@ -129,8 +129,8 @@ __global__ void test_exchange( template __global__ void test_inc_dec_sequences( - T* __restrict__ aggregate, - SeqType* __restrict__ inc_dec_sequences) + T* aggregate, + SeqType* inc_dec_sequences) { auto global_thread_index = threadIdx.x + blockIdx.x * blockDim.x; @@ -151,8 +151,8 @@ template __global__ void execute_testcase( F testcase_device_function, size_t num_values_to_populate, - T* __restrict__ values_to_populate, - const Is* __restrict__ ... inputs + T* values_to_populate, + const Is* ... inputs ) { testcase_device_function(num_values_to_populate, values_to_populate, inputs...); @@ -180,7 +180,7 @@ auto execute_testcase_on_gpu( cuda::launch_configuration_t launch_config, size_t num_values_to_populate, T result_initial_fill_value, - Is* __restrict__ ... inputs) + Is* ... inputs) { cuda::device_t device { cuda::device::current::get() }; auto device_side_results { cuda::memory::device::make_unique(device, num_values_to_populate) }; @@ -222,7 +222,7 @@ auto execute_non_uniform_testcase_on_gpu( T result_initial_fill_value, cuda::grid::dimensions_t grid_dimensions, cuda::grid::block_dimensions_t block_dimensions, - Is* __restrict__ ... inputs) + Is* ... inputs) { auto launch_config { cuda::make_launch_config(grid_dimensions, block_dimensions) }; @@ -595,8 +595,8 @@ TEST_CASE_TEMPLATE("min - random values from host", T, INTEGER_TYPES, FLOAT_TYPE auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -645,8 +645,8 @@ TEST_CASE_TEMPLATE("max - random values from host", T, INTEGER_TYPES, FLOAT_TYPE auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -691,8 +691,8 @@ TEST_CASE_TEMPLATE("min - single outlier", T, INTEGER_TYPES, FLOAT_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -738,8 +738,8 @@ TEST_CASE_TEMPLATE("max - single outlier", T, INTEGER_TYPES, FLOAT_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -784,8 +784,8 @@ TEST_CASE_TEMPLATE("logical_and - single outlier", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -828,8 +828,8 @@ TEST_CASE_TEMPLATE("logical_or - single outlier", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -874,8 +874,8 @@ TEST_CASE_TEMPLATE("logical_xor - single outlier 0", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -919,8 +919,8 @@ TEST_CASE_TEMPLATE("logical_xor - single outlier 1", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -963,7 +963,7 @@ TEST_CASE_TEMPLATE("logical_not - single non-negator", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate) + T* aggregate) { namespace gi = kat::linear_grid; @@ -1004,7 +1004,7 @@ TEST_CASE_TEMPLATE("logical_not - single negater", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate) + T* aggregate) { namespace gi = kat::linear_grid; @@ -1049,8 +1049,8 @@ TEST_CASE_TEMPLATE("logical_not - by random threads", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict target, - const fake_bool* __restrict perform_op_indicators) + T* target, + const fake_bool* perform_op_indicators) { namespace gi = kat::linear_grid; bool perform_op = perform_op_indicators[gi::thread::global_index()]; @@ -1103,8 +1103,8 @@ TEST_CASE_TEMPLATE("bitwise_and - single outliers", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -1153,8 +1153,8 @@ TEST_CASE_TEMPLATE("bitwise_or - single outliers", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict aggregate, - const T* __restrict input_data) + T* aggregate, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -1200,8 +1200,8 @@ TEST_CASE_TEMPLATE("bitwise_xor - random values from host", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict target, - const T* __restrict input_data) + T* target, + const T* input_data) { namespace gi = kat::linear_grid; auto thread_element = input_data[gi::thread::global_index()]; @@ -1249,8 +1249,8 @@ TEST_CASE_TEMPLATE("bitwise_not - by random threads", T, INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict target, - const fake_bool* __restrict perform_op_indicators) + T* target, + const fake_bool* perform_op_indicators) { namespace gi = kat::linear_grid; bool perform_op = perform_op_indicators[gi::thread::global_index()]; @@ -1315,8 +1315,8 @@ TEST_CASE_TEMPLATE("set_bit - few outliers", T, long int) { // INTEGER_TYPES) { auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict target, - const bit_index_type* __restrict bit_indices + T* target, + const bit_index_type* bit_indices ) { namespace gi = kat::linear_grid; @@ -1380,8 +1380,8 @@ TEST_CASE_TEMPLATE("unset_bit - few outliers", T, long int) { // INTEGER_TYPES) auto testcase_device_function = [=] KAT_DEV ( size_t, - T* __restrict target, - const bit_index_type* __restrict bit_indices + T* target, + const bit_index_type* bit_indices ) { namespace gi = kat::linear_grid; diff --git a/tests/builtins.cu b/tests/builtins.cu index 59ec975..1c7c019 100644 --- a/tests/builtins.cu +++ b/tests/builtins.cu @@ -115,9 +115,9 @@ template __global__ void execute_testcases( // F f, size_t num_checks, - fake_bool* __restrict__ execution_complete, - R* __restrict__ results, - const Is* __restrict__ ... inputs + fake_bool* execution_complete, + R* results, + const Is* ... inputs ) { auto global_thread_index = threadIdx.x + blockIdx.x * blockDim.x; @@ -244,10 +244,10 @@ void check_results( size_t num_checks, const char* testcase_name, // perhaps add another parameter for specific individual-check details? - const R* __restrict__ actual_results, + const R* actual_results, F expected_result_retriever, optional comparison_tolerance_fraction, - const Is* __restrict__... inputs) + const Is* ... inputs) { std::stringstream ss; auto index_width = set_width_for_up_to(num_checks); @@ -295,7 +295,7 @@ auto execute_testcase_on_gpu( const char* testcase_name, cuda::launch_configuration_t launch_config, size_t num_checks, - Is* __restrict__ ... inputs) + Is* ... inputs) { cuda::device_t device { cuda::device::current::get() }; auto device_side_results { cuda::memory::device::make_unique(device, num_checks) }; @@ -333,13 +333,13 @@ auto execute_testcase_on_gpu( template void execute_testcase_on_gpu_and_check( std::index_sequence is, - const R* __restrict__ expected_results, + const R* expected_results, K testcase_kernel, const char* testcase_name, cuda::launch_configuration_t launch_config, size_t num_checks, optional comparison_tolerance_fraction, - Is* __restrict__ ... inputs) + Is* ... inputs) { auto host_side_results = execute_testcase_on_gpu( tag{}, @@ -365,10 +365,10 @@ void execute_testcase_on_gpu_and_check( template void execute_uniform_builtin_testcase_on_gpu_and_check( DeviceFunctionHook dfh, - const R* __restrict__ expected_results, + const R* expected_results, size_t num_checks, optional comparison_tolerance_fraction, - Is* __restrict__ ... inputs) + Is* ... inputs) { auto block_size { 128 }; auto num_grid_blocks { div_rounding_up(num_checks, block_size) }; @@ -400,12 +400,12 @@ void execute_uniform_builtin_testcase_on_gpu_and_check( template void execute_non_uniform_builtin_testcase_on_gpu_and_check( DeviceFunctionHook dfh, - const R* __restrict__ expected_results, + const R* expected_results, size_t num_checks, cuda::grid::dimension_t num_grid_blocks, cuda::grid::block_dimension_t block_size, optional comparison_tolerance_fraction, - Is* __restrict__ ... inputs) + Is* ... inputs) { auto launch_config { cuda::make_launch_config(num_grid_blocks, block_size) }; // TODO: Should we check that num_checks is equal to the number of grid threads? diff --git a/tests/sequence_ops.cu b/tests/sequence_ops.cu index 00d86ff..44560c2 100644 --- a/tests/sequence_ops.cu +++ b/tests/sequence_ops.cu @@ -52,8 +52,8 @@ template __global__ void execute_testcase( F testcase_device_function, size_t num_values_to_populate, - T* __restrict__ values_to_populate, - const Is* __restrict__ ... inputs + T* values_to_populate, + const Is* ... inputs ) { testcase_device_function(num_values_to_populate, values_to_populate, inputs...); @@ -123,10 +123,10 @@ template void check_results( std::string title, size_t num_values_to_check, - const T* __restrict__ actual_values, + const T* actual_values, F expected_value_retriever, optional comparison_tolerance_fraction, - const Is* __restrict__... inputs) + const Is* ... inputs) { std::stringstream ss; auto index_width = set_width_for_up_to(num_values_to_check); @@ -158,10 +158,10 @@ void check_results( template void check_results( size_t num_values_to_check, - const T* __restrict__ actual_values, + const T* actual_values, F expected_value_retriever, optional comparison_tolerance_fraction, - const Is* __restrict__... inputs) + const Is* ... inputs) { return check_results( std::string("testcase ") + doctest::current_test_name(), @@ -193,7 +193,7 @@ auto execute_testcase_on_gpu( F testcase_device_function, cuda::launch_configuration_t launch_config, size_t num_values_to_populate, - Is* __restrict__ ... inputs) + Is* ... inputs) { cuda::device_t device { cuda::device::current::get() }; auto device_side_results { cuda::memory::device::make_unique(device, num_values_to_populate) }; @@ -231,7 +231,7 @@ void execute_non_uniform_testcase_on_gpu_and_check( cuda::grid::dimensions_t grid_dimensions, cuda::grid::block_dimensions_t block_dimensions, optional comparison_tolerance_fraction, - Is* __restrict__ ... inputs) + Is* ... inputs) { auto launch_config { cuda::make_launch_config(grid_dimensions, block_dimensions) }; @@ -261,7 +261,7 @@ auto execute_non_uniform_testcase_on_gpu( size_t num_values_to_populate, cuda::grid::dimensions_t grid_dimensions, cuda::grid::block_dimensions_t block_dimensions, - Is* __restrict__ ... inputs) + Is* ... inputs) { auto launch_config { cuda::make_launch_config(grid_dimensions, block_dimensions) }; @@ -292,10 +292,10 @@ TEST_CASE("append_to_global_memory") { // template // KAT_FD void collaborative_append_to_global_memory( -// T* __restrict__ global_output, -// Size* __restrict__ global_output_length, -// T* __restrict__ fragment_to_append, -// Size __restrict__ fragment_length) +// T* global_output, +// Size* global_output_length, +// T* fragment_to_append, +// Size fragment_length) }