Skip to content

Commit

Permalink
Use double for all scaling values and float-point constant values at …
Browse files Browse the repository at this point in the history
…the Device Op API (#557)

* Use double as alpha/beta values type in reduce device op api

* Use double as alpha/beta values type in softmax device op api

* Use double as alpha/beta values type in multiple-reduce device op api

* Use double as epsilon value type in normalization/elementwise-normalization device op api
  • Loading branch information
qianfengz authored Jan 18, 2023
1 parent 1cfa876 commit 52abc2f
Show file tree
Hide file tree
Showing 24 changed files with 112 additions and 109 deletions.
14 changes: 7 additions & 7 deletions client_example/06_softmax/softmax4d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ int main(int argc, char* argv[])
ck::index_t num_elements =
std::accumulate(in_lengths.begin(), in_lengths.end(), 1, std::multiplies<ck::index_t>());

AccDataType alpha{2.0f};
AccDataType beta{2.0f};
double alpha{2.0};
double beta{2.0};

SimpleDeviceMem in(sizeof(InDataType) * num_elements);
SimpleDeviceMem out(sizeof(OutDataType) * num_elements);
Expand Down Expand Up @@ -82,8 +82,8 @@ int main(int argc, char* argv[])
auto argument_ptr = op_ptr->MakeArgumentPointer(in_lengths,
in_strides,
reduce_dims,
&alpha,
&beta,
alpha,
beta,
in.GetDeviceBuffer(),
out.GetDeviceBuffer(),
PassThrough{},
Expand Down Expand Up @@ -129,8 +129,8 @@ int main(int argc, char* argv[])
auto argument_ptr = op_ptr->MakeArgumentPointer(in_lengths,
in_strides,
reduce_dims,
&alpha,
&beta,
alpha,
beta,
in.GetDeviceBuffer(),
out.GetDeviceBuffer(),
PassThrough{},
Expand All @@ -147,4 +147,4 @@ int main(int argc, char* argv[])
}

return 0;
}
}
4 changes: 2 additions & 2 deletions client_example/15_reduce/reduce_nhwc_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ int main(int argc, char* argv[])
for(auto dim : reduce_dims)
reduce_length *= in_lengths[dim];

float alpha{1.0f};
float beta{0.0f};
double alpha{1.0};
double beta{0.0};

SimpleDeviceMem in(sizeof(InDataType) * num_in_elements);
SimpleDeviceMem out(sizeof(OutDataType) * num_out_elements);
Expand Down
8 changes: 4 additions & 4 deletions example/12_reduce/reduce_blockwise_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ int reduce_blockwise_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in.mData.data(),
nullptr,
out_ref.mData.data(),
Expand All @@ -295,8 +295,8 @@ int reduce_blockwise_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_dev.GetDeviceBuffer(),
nullptr,
out_dev.GetDeviceBuffer(),
Expand Down
12 changes: 6 additions & 6 deletions example/12_reduce/reduce_blockwise_two_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ int main(int argc, char* argv[])
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_1.mData.data(),
nullptr,
out_ref.mData.data(),
Expand All @@ -254,8 +254,8 @@ int main(int argc, char* argv[])
arrInLengths_2,
arrInStrides_2,
reduceDims_1,
1.0f,
0.0f,
1.0,
0.0,
in_1_dev.GetDeviceBuffer(),
nullptr,
in_2_dev.GetDeviceBuffer(),
Expand All @@ -278,8 +278,8 @@ int main(int argc, char* argv[])
arrOutLengths,
arrOutStrides,
reduceDims_2,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_2_dev.GetDeviceBuffer(),
nullptr,
out_dev.GetDeviceBuffer(),
Expand Down
8 changes: 4 additions & 4 deletions example/12_reduce/reduce_multiblock_atomic_add_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in.mData.data(),
nullptr,
out_ref.mData.data(),
Expand All @@ -208,8 +208,8 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_dev.GetDeviceBuffer(),
nullptr,
out_dev.GetDeviceBuffer(),
Expand Down
12 changes: 6 additions & 6 deletions example/23_softmax/softmax_blockwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ class SimpleAppArgs
int option_index = 0;

public:
std::vector<size_t> inLengths = {8, 128, 2048};
std::vector<AccDataType> scales = {2.0f, 2.0f};
std::vector<size_t> inLengths = {8, 128, 2048};
std::vector<double> scales = {2.0, 2.0};

bool do_verification = true;
int init_method = 2;
Expand Down Expand Up @@ -151,8 +151,8 @@ int main(int argc, char* argv[])
auto inStrides = in.mDesc.GetStrides();
auto outStrides = out.mDesc.GetStrides();

AccDataType alpha = args.scales[0];
AccDataType beta = args.scales[1];
double alpha = args.scales[0];
double beta = args.scales[1];

std::cout << "in: " << in.mDesc << std::endl;
std::cout << "out: " << out.mDesc << std::endl;
Expand Down Expand Up @@ -221,8 +221,8 @@ int main(int argc, char* argv[])
auto argument_ptr = device_instance.MakeArgumentPointer(i_inLengths,
i_inStrides,
reduceDims,
&alpha,
&beta,
alpha,
beta,
in_dev.GetDeviceBuffer(),
out_dev.GetDeviceBuffer(),
PassThrough{},
Expand Down
8 changes: 4 additions & 4 deletions example/33_multiple_reduce/dual_reduce_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ int mean_meansquare_dual_reduce_test(size_t n,
size_t invariant_total_length = n;
size_t reduce_total_length = h * w * c;

const AccDataType alpha = ck::type_convert<AccDataType>(1.0f);
const AccDataType beta = ck::type_convert<AccDataType>(0.0f);
const double alpha = 1.0f;
const double beta = 0.0f;

std::size_t num_thread = 1;

Expand Down Expand Up @@ -267,8 +267,8 @@ int mean_meansquare_dual_reduce_test(size_t n,
i_outLengths,
{i_outStrides, i_outStrides},
reduceDims,
{&alpha, &alpha},
{&beta, &beta},
{alpha, alpha},
{beta, beta},
in_dev.GetDeviceBuffer(),
{mean_dev.GetDeviceBuffer(), meansquare_dev.GetDeviceBuffer()},
ck::make_tuple(InElementwiseOperation_Mean{}, InElementwiseOperation_Meansquare{}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct DeviceElementwiseNormalization : public BaseOperator
const std::vector<index_t> betaStrides,
const std::vector<index_t> yStrides,
const std::vector<index_t> reduceDims,
AccDataType epsilon,
double epsilon,
const std::array<const void*, NumInput> in_dev_buffers,
const void* p_gamma,
const void* p_beta,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ struct DeviceMultipleReduce : public BaseOperator
const std::array<index_t, NumOutputDim> outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStrides,
const std::array<int, NumReduceDim> reduceDims,
const std::array<const void*, NumReduction> alphas,
const std::array<const void*, NumReduction> betas,
const std::array<double, NumReduction> alphas,
const std::array<double, NumReduction> betas,
const void* in_dev,
const std::array<void*, NumReduction> out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct DeviceNormalization : public BaseOperator
const std::vector<index_t> betaStrides,
const std::vector<index_t> yStrides,
const std::vector<index_t> reduceDims,
AccDataType epsilon,
double epsilon,
const void* p_x,
const void* p_gamma,
const void* p_beta,
Expand Down
4 changes: 2 additions & 2 deletions include/ck/tensor_operation/gpu/device/device_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ struct DeviceReduce : public BaseOperator
const std::array<index_t, NumOutDim> outLengths,
const std::array<index_t, NumOutDim> outStrides,
const std::array<int, NumReduceDim> reduceDims,
float alpha,
float beta,
double alpha,
double beta,
const void* in_dev,
const void* in_index_dev,
void* out_dev,
Expand Down
10 changes: 4 additions & 6 deletions include/ck/tensor_operation/gpu/device/device_softmax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ struct DeviceSoftmax : public BaseOperator
// @param[in] inLengths Input tensor extent(s) from high to low dimension
// @param[in] inStrides Input tensor stride(s) from high to low dimension
// @param[in] reduceDims The dimension(s) the normalization operation is applied
// @param[in] alpha Typeless pointer in host memory storing the alpha scaling
// value as type AccDataType
// @param[in] beta Typeless pointer in host memory storing the beta scaling
// value as type AccDataType
// @param[in] alpha double type value
// @param[in] beta double type value
// @param[in] in_dev Typeless const pointer in device memory storing the input
// tensor
// @param out_dev Typeless pointer in device memory storing the output tensor
Expand All @@ -43,8 +41,8 @@ struct DeviceSoftmax : public BaseOperator
MakeArgumentPointer(const std::vector<index_t> inLengths,
const std::vector<index_t> inStrides,
const std::vector<int> reduceDims,
const void* alpha,
const void* beta,
double alpha,
double beta,
const void* in_dev,
void* out_dev,
InElementwiseOp in_elementwise_op,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,18 +270,18 @@ struct DeviceElementwiseNormalizationImpl
const std::vector<index_t> reduceDims,
XElementwiseOperation x_elementwise_op,
YElementwiseOperation y_elementwise_op,
AccDataType epsilon,
double epsilon,
const std::array<const void*, NumInput> in_dev_buffers,
const GammaDataType* p_gamma,
const BetaDataType* p_beta,
YDataType* p_y)
: epsilon_(epsilon),
p_gamma_(p_gamma),
: p_gamma_(p_gamma),
p_beta_(p_beta),
p_y_(p_y),
x_elementwise_op_(x_elementwise_op),
y_elementwise_op_(y_elementwise_op)
{
epsilon_ = static_cast<AccDataType>(epsilon);

Lengths_ = shuffle_tensor_dimensions<Rank, NumReduceDim>(lengths, reduceDims);
for(int i = 0; i < NumInput; i++)
Expand Down Expand Up @@ -543,7 +543,7 @@ struct DeviceElementwiseNormalizationImpl
const std::vector<index_t> betaStrides,
const std::vector<index_t> yStrides,
const std::vector<index_t> reduceDims,
AccDataType epsilon,
double epsilon,
const std::array<const void*, NumInput> in_dev_buffers,
const void* p_gamma,
const void* p_beta,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim>& outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction>& outStridesArray,
const std::array<int, NumReduceDim>& reduceDims,
const std::array<const void*, NumReduction>& alphas,
const std::array<const void*, NumReduction>& betas,
const std::array<double, NumReduction>& alphas,
const std::array<double, NumReduction>& betas,
const void* in_dev,
const std::array<void*, NumReduction>& out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand All @@ -286,8 +286,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,

for(size_t i = 0; i < NumReduction; i++)
{
alpha_values_(i) = *static_cast<const AccDataType*>(alphas[i]);
beta_values_(i) = *static_cast<const AccDataType*>(betas[i]);
alpha_values_(i) = static_cast<AccDataType>(alphas[i]);
beta_values_(i) = static_cast<AccDataType>(betas[i]);
};

in_dev_ = static_cast<const InDataType*>(in_dev);
Expand Down Expand Up @@ -547,8 +547,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim> outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStridesArray,
const std::array<int, NumReduceDim> reduceDims,
const std::array<const void*, NumReduction> alphas,
const std::array<const void*, NumReduction> betas,
const std::array<double, NumReduction> alphas,
const std::array<double, NumReduction> betas,
const void* in_dev,
const std::array<void*, NumReduction> out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim>& outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction>& outStridesArray,
const std::array<int, NumReduceDim>& reduceDims,
const std::array<const void*, NumReduction>& alphas,
const std::array<const void*, NumReduction>& betas,
const std::array<double, NumReduction>& alphas,
const std::array<double, NumReduction>& betas,
const void* in_dev,
const std::array<void*, NumReduction>& out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand All @@ -211,8 +211,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,

for(size_t i = 0; i < NumReduction; i++)
{
alpha_values_(i) = *static_cast<const AccDataType*>(alphas[i]);
beta_values_(i) = *static_cast<const AccDataType*>(betas[i]);
alpha_values_(i) = static_cast<AccDataType>(alphas[i]);
beta_values_(i) = static_cast<AccDataType>(betas[i]);
};

in_dev_ = static_cast<const InDataType*>(in_dev);
Expand Down Expand Up @@ -374,8 +374,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim> outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStridesArray,
const std::array<int, NumReduceDim> reduceDims,
const std::array<const void*, NumReduction> alphas,
const std::array<const void*, NumReduction> betas,
const std::array<double, NumReduction> alphas,
const std::array<double, NumReduction> betas,
const void* in_dev,
const std::array<void*, NumReduction> out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand Down
Loading

0 comments on commit 52abc2f

Please sign in to comment.