Skip to content

Commit

Permalink
[GPU/OpenCL] Rebased and Changes for Addition Layer
Browse files Browse the repository at this point in the history
Rebased to main
Added LayerSemanticsGPU for Addition Layer

Signed-off-by: yash.singh <[email protected]>
  • Loading branch information
yashSingh0723 committed Jun 6, 2024
2 parents e69d4ad + 11c096b commit 9976775
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 15 deletions.
2 changes: 1 addition & 1 deletion nntrainer/layers/cl_layers/blas_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ void addition_cl(const float *input, float *res,
unsigned int size, RunLayerContext &context) {

bool result = false;

do {
result = result =
context.clCreateKernel(addition_cl_kernel_, context.LayerKernel::ADD,
Expand Down
5 changes: 2 additions & 3 deletions nntrainer/layers/cl_layers/blas_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ void sgemm_cl(const float *A, const float *B, float *C, unsigned int M,
unsigned int ldb, unsigned int ldc, RunLayerContext &context);

/**
<<<<<<< HEAD
* @brief fp16 sgemm computation : Y = op(A)*op(B) + C,
* where op(X) is one of X or X**T
* @param[in] A fp16 * for Matrix A
Expand All @@ -118,7 +117,8 @@ void sgemm_cl(const float *A, const float *B, float *C, unsigned int M,
void sgemm_cl(const __fp16 *A, const __fp16 *B, __fp16 *C, unsigned int M,
unsigned int N, unsigned int K, unsigned int lda,
unsigned int ldb, unsigned int ldc, RunLayerContext &context);
=======

/**
* @brief addition : sum of all input vectors
* @param[in] input float * for input
* @param[in] res float * for result/output
Expand All @@ -127,7 +127,6 @@ void sgemm_cl(const __fp16 *A, const __fp16 *B, __fp16 *C, unsigned int M,
*/
void addition_cl(const float *input, float *res, unsigned int size,
RunLayerContext &context);
>>>>>>> [GPU/OpenCL] Addition Kernel added in reusable blas OpenCL kernels

} // namespace nntrainer
#endif /* __BLAS_KERNELS_H__ */
3 changes: 0 additions & 3 deletions test/input_gen/gen_layer_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,3 @@ def swiglu(inputs):

added = K.layers.Add()
record_single(added, [(3, 4, 3, 4), (3, 4, 3, 4)], "added_w32a32_2")

added = K.layers.Add()
record_single(added, [(20, 55, 50, 55), (20, 55, 50, 55)], "added_w32a32_3")
10 changes: 2 additions & 8 deletions test/unittest/layers/unittest_layers_addition_cl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ auto semantic_addition_multi_gpu = LayerSemanticsParamType(
nntrainer::AdditionLayerCL::type, {},
LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 2);

GTEST_PARAMETER_TEST(AdditionGPU, LayerSemantics,
GTEST_PARAMETER_TEST(AdditionGPU, LayerSemanticsGpu,
::testing::Values(semantic_addition_gpu,
semantic_addition_multi_gpu));

Expand All @@ -40,11 +40,5 @@ auto addition_w32a32_2 = LayerGoldenTestParamType(
"added_w32a32_2.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT, "nchw",
"fp32", "fp32");

auto addition_w32a32_3 = LayerGoldenTestParamType(
nntrainer::createLayer<nntrainer::AdditionLayerCL>, {},
"20:55:50:55,20:55:50:55", "added_w32a32_3.nnlayergolden",
LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp32", "fp32");

GTEST_PARAMETER_TEST(AdditionGPU, LayerGoldenTest,
::testing::Values(addition_w32a32, addition_w32a32_2,
addition_w32a32_3));
::testing::Values(addition_w32a32, addition_w32a32_2));

0 comments on commit 9976775

Please sign in to comment.