Skip to content

Commit

Permalink
[ BLAS ] Consider NaN case of output vector
Browse files Browse the repository at this point in the history
- With default output scalar multiplier value beta, output Z might contain NaN values.

**Self evaluation:**
1. Build test:     [X]Passed [ ]Failed [ ]Skipped
2. Run test:     [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: skykongkong8 <[email protected]>
  • Loading branch information
skykongkong8 committed Feb 19, 2024
1 parent 7363546 commit ae4e50e
Showing 1 changed file with 29 additions and 44 deletions.
73 changes: 29 additions & 44 deletions nntrainer/tensor/blas_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,15 +386,12 @@ void ele_mul(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z,
#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::ele_mul(N, X, Y, Z, alpha, beta);
#else
if (beta != 0.f) {
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int i = 0; i < N; ++i) {
if (beta != 0.f)
Z[i] = static_cast<_FP16>(alpha) * X[i] * Y[i] +
static_cast<_FP16>(beta) * Z[i];
}
} else {
for (unsigned int i = 0; i < N; ++i) {
else
Z[i] = static_cast<_FP16>(alpha) * X[i] * Y[i];
}
}
#endif
}
Expand All @@ -404,15 +401,12 @@ void ele_add(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z,
#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::ele_add(N, X, Y, Z, alpha, beta);
#else
if (beta != 0.f) {
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int i = 0; i < N; ++i) {
if (beta != 0.f)
Z[i] = X[i] + static_cast<_FP16>(alpha) * Y[i] +
static_cast<_FP16>(beta) * Z[i];
}
} else {
for (unsigned int i = 0; i < N; ++i) {
else
Z[i] = X[i] + static_cast<_FP16>(alpha) * Y[i];
}
}
#endif
}
Expand All @@ -422,15 +416,12 @@ void ele_sub(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z,
#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::ele_sub(N, X, Y, Z, alpha, beta);
#else
if (beta != 0.f) {
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int i = 0; i < N; ++i) {
if (beta != 0.f)
Z[i] = X[i] - static_cast<_FP16>(alpha) * Y[i] +
static_cast<_FP16>(beta) * Z[i];
}
} else {
for (unsigned int i = 0; i < N; ++i) {
else
Z[i] = X[i] - static_cast<_FP16>(alpha) * Y[i];
}
}
#endif
}
Expand All @@ -440,15 +431,12 @@ void ele_div(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z,
#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::ele_div(N, X, Y, Z, alpha, beta);
#else
if (beta != 0.f) {
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int i = 0; i < N; ++i) {
if (beta != 0.f)
Z[i] = X[i] / (static_cast<_FP16>(alpha) * Y[i]) +
static_cast<_FP16>(beta) * Z[i];
}
} else {
for (unsigned int i = 0; i < N; ++i) {
else
Z[i] = X[i] / (static_cast<_FP16>(alpha) * Y[i]);
}
}
#endif
}
Expand Down Expand Up @@ -955,8 +943,10 @@ void ele_mul(const unsigned int N, const float *X, const float *Y, float *Z,
nntrainer::neon::ele_mul(N, X, Y, Z, alpha, beta);
#else
for (unsigned int i = 0; i < N; ++i) {
Z[i] *= beta;
Z[i] = alpha * X[i] * Y[i];
if (beta != 0.f)
Z[i] = alpha * X[i] * Y[i] + beta * Z[i];
else
Z[i] = alpha * X[i] * Y[i];
}
#endif
}
Expand All @@ -967,8 +957,10 @@ void ele_add(const unsigned int N, const float *X, const float *Y, float *Z,
nntrainer::neon::ele_add(N, X, Y, Z, alpha, beta);
#else
for (unsigned int i = 0; i < N; ++i) {
Z[i] *= beta;
Z[i] = X[i] + alpha * Y[i];
if (beta != 0.f)
Z[i] = X[i] + alpha * Y[i] + beta * Z[i];
else
Z[i] = X[i] + alpha * Y[i];
}
#endif
}
Expand All @@ -978,16 +970,13 @@ void ele_sub(const unsigned int N, const float *X, const float *Y, float *Z,
#ifdef USE_NEON
nntrainer::neon::ele_sub(N, X, Y, Z, alpha, beta);
#else
if (beta != 0.f) {
for (unsigned int i = 0; i < N; ++i) {
Z[i] = X[i] - alpha * Y[i] +
beta * Z[i];
}
} else {
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int i = 0; i < N; ++i) {
if (beta != 0.f)
Z[i] = X[i] - alpha * Y[i] + beta * Z[i];
else
Z[i] = X[i] - alpha * Y[i];
}
}

#endif
}

Expand All @@ -996,15 +985,11 @@ void ele_div(const unsigned int N, const float *X, const float *Y, float *Z,
#ifdef USE_NEON
nntrainer::neon::ele_div(N, X, Y, Z, alpha, beta);
#else
if (beta != 0.f) {
for (unsigned int i = 0; i < N; ++i) {
Z[i] = X[i] / (alpha * Y[i]) +
beta * Z[i];
}
} else {
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int i = 0; i < N; ++i) {
if (beta != 0.f)
Z[i] = X[i] / (alpha * Y[i]) + beta * Z[i];
else
Z[i] = X[i] / (alpha * Y[i]);
}
}
#endif
}
Expand Down

0 comments on commit ae4e50e

Please sign in to comment.