From ae4e50e35e8d8e7e1c64ce114eafba0601de34b7 Mon Sep 17 00:00:00 2001 From: skykongkong8 <ss.kong@samsung.com> Date: Mon, 19 Feb 2024 09:37:22 +0900 Subject: [PATCH] [ BLAS ] Consider NaN case of output vector - With default output scalar multiplier value beta, output Z might contain NaN values. **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: skykongkong8 <ss.kong@samsung.com> --- nntrainer/tensor/blas_interface.cpp | 73 ++++++++++++----------------- 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/nntrainer/tensor/blas_interface.cpp b/nntrainer/tensor/blas_interface.cpp index 3ce60ec782..fbcf899ad5 100644 --- a/nntrainer/tensor/blas_interface.cpp +++ b/nntrainer/tensor/blas_interface.cpp @@ -386,15 +386,12 @@ void ele_mul(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, #if (defined USE__FP16 && USE_NEON) nntrainer::neon::ele_mul(N, X, Y, Z, alpha, beta); #else - if (beta != 0.f) { - for (unsigned int i = 0; i < N; ++i) { + for (unsigned int i = 0; i < N; ++i) { + if (beta != 0.f) Z[i] = static_cast<_FP16>(alpha) * X[i] * Y[i] + static_cast<_FP16>(beta) * Z[i]; - } - } else { - for (unsigned int i = 0; i < N; ++i) { + else Z[i] = static_cast<_FP16>(alpha) * X[i] * Y[i]; - } } #endif } @@ -404,15 +401,12 @@ void ele_add(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, #if (defined USE__FP16 && USE_NEON) nntrainer::neon::ele_add(N, X, Y, Z, alpha, beta); #else - if (beta != 0.f) { - for (unsigned int i = 0; i < N; ++i) { + for (unsigned int i = 0; i < N; ++i) { + if (beta != 0.f) Z[i] = X[i] + static_cast<_FP16>(alpha) * Y[i] + static_cast<_FP16>(beta) * Z[i]; - } - } else { - for (unsigned int i = 0; i < N; ++i) { + else Z[i] = X[i] + static_cast<_FP16>(alpha) * Y[i]; - } } #endif } @@ -422,15 +416,12 @@ void ele_sub(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, #if (defined USE__FP16 && USE_NEON) nntrainer::neon::ele_sub(N, X, Y, Z, alpha, beta); #else - if (beta != 0.f) { - for (unsigned int i = 0; i < N; ++i) { + for (unsigned int i = 0; i < N; ++i) { + if (beta != 0.f) Z[i] = X[i] - static_cast<_FP16>(alpha) * Y[i] + static_cast<_FP16>(beta) * Z[i]; - } - } else { - for (unsigned int i = 0; i < N; ++i) { + else Z[i] = X[i] - static_cast<_FP16>(alpha) * Y[i]; - } } #endif } @@ -440,15 +431,12 @@ void ele_div(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, #if (defined USE__FP16 && USE_NEON) nntrainer::neon::ele_div(N, X, Y, Z, alpha, beta); #else - if (beta != 0.f) { - for (unsigned int i = 0; i < N; ++i) { + for (unsigned int i = 0; i < N; ++i) { + if (beta != 0.f) Z[i] = X[i] / (static_cast<_FP16>(alpha) * Y[i]) + static_cast<_FP16>(beta) * Z[i]; - } - } else { - for (unsigned int i = 0; i < N; ++i) { + else Z[i] = X[i] / (static_cast<_FP16>(alpha) * Y[i]); - } } #endif } @@ -955,8 +943,10 @@ void ele_mul(const unsigned int N, const float *X, const float *Y, float *Z, nntrainer::neon::ele_mul(N, X, Y, Z, alpha, beta); #else for (unsigned int i = 0; i < N; ++i) { - Z[i] *= beta; - Z[i] = alpha * X[i] * Y[i]; + if (beta != 0.f) + Z[i] = alpha * X[i] * Y[i] + beta * Z[i]; + else + Z[i] = alpha * X[i] * Y[i]; } #endif } @@ -967,8 +957,10 @@ void ele_add(const unsigned int N, const float *X, const float *Y, float *Z, nntrainer::neon::ele_add(N, X, Y, Z, alpha, beta); #else for (unsigned int i = 0; i < N; ++i) { - Z[i] *= beta; - Z[i] = X[i] + alpha * Y[i]; + if (beta != 0.f) + Z[i] = X[i] + alpha * Y[i] + beta * Z[i]; + else + Z[i] = X[i] + alpha * Y[i]; } #endif } @@ -978,16 +970,13 @@ void ele_sub(const unsigned int N, const float *X, const float *Y, float *Z, #ifdef USE_NEON nntrainer::neon::ele_sub(N, X, Y, Z, alpha, beta); #else - if (beta != 0.f) { - for (unsigned int i = 0; i < N; ++i) { - Z[i] = X[i] - alpha * Y[i] + - beta * Z[i]; - } - } else { - for (unsigned int i = 0; i < N; ++i) { + for (unsigned int i = 0; i < N; ++i) { + if (beta != 0.f) + Z[i] = X[i] - alpha * Y[i] + beta * Z[i]; + else Z[i] = X[i] - alpha * Y[i]; - } } + #endif } @@ -996,15 +985,11 @@ void ele_div(const unsigned int N, const float *X, const float *Y, float *Z, #ifdef USE_NEON nntrainer::neon::ele_div(N, X, Y, Z, alpha, beta); #else - if (beta != 0.f) { - for (unsigned int i = 0; i < N; ++i) { - Z[i] = X[i] / (alpha * Y[i]) + - beta * Z[i]; - } - } else { - for (unsigned int i = 0; i < N; ++i) { + for (unsigned int i = 0; i < N; ++i) { + if (beta != 0.f) + Z[i] = X[i] / (alpha * Y[i]) + beta * Z[i]; + else Z[i] = X[i] / (alpha * Y[i]); - } } #endif }