Skip to content

Commit

Permalink
[blas/OpenCL] Added multiply OpenCL kernel and unit test
Browse files Browse the repository at this point in the history
Added sscal equivalent kernel and multiply function.
Added unit test setup to test standalone kernels.

Signed-off-by: Debadri Samaddar <[email protected]>
  • Loading branch information
s-debadri committed Jul 2, 2024
1 parent b9c27e9 commit 602bc5e
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 1 deletion.
12 changes: 12 additions & 0 deletions nntrainer/tensor/cl_operations/blas_kernel_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ namespace nntrainer {
Tensor dotCl(Tensor const &input, Tensor const &m, RunLayerContext &context,
bool trans = false, bool trans_m = false);

/**
* @brief Process data and dimensions for OpenCL dot operation
* @param[in] input Tensor
* @param[in] m Tensor
* @param[in] result Tensor
* @param[in] RunLayerContext reference
* @param[in] trans bool
* @param[in] trans_m bool
*/
Tensor dotCl(Tensor const &input, Tensor const &m, RunLayerContext &context,
bool trans = false, bool trans_m = false);

/**
* @brief Process data and dimensions for OpenCL dot operation
* @param[in] input Tensor
Expand Down
1 change: 1 addition & 0 deletions nntrainer/tensor/cl_operations/blas_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,4 +421,5 @@ void sscal_cl(float *X, const unsigned int N, const float alpha,

} while (false);
}

} // namespace nntrainer
2 changes: 1 addition & 1 deletion nntrainer/tensor/cl_operations/blas_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ void addition_cl(const __fp16 *input, __fp16 *res, unsigned int size,

/**
* @brief fp16 sscal value element by element immediately
* @param[in] X __fp16 * input
* @param[in] X float * input
* @param[in] N unsigned int number of elements
* @param[in] alpha float multiplier
* @param[in] context RunLayerContext reference
Expand Down
1 change: 1 addition & 0 deletions nntrainer/tensor/cl_operations/blas_kernels_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,4 +441,5 @@ void sscal_cl(__fp16 *X, const unsigned int N, const float alpha,

} while (false);
}

} // namespace nntrainer

0 comments on commit 602bc5e

Please sign in to comment.