-
Notifications
You must be signed in to change notification settings - Fork 82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ BLAS ] Implement elementwise operations #2474
Changes from all commits
0141020
7c2d042
57eeff3
4a3a56c
80c13dd
b3a271c
e9f4330
736be61
02d47e2
b6d2914
9a3f5e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -245,25 +245,6 @@ static void copy_int8_to_fp16(const unsigned int N, const uint8_t *X, | |||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
static void ewvm_FP16(const unsigned int N, const _FP16 *X, const _FP16 *Y, | ||||||||||||||
_FP16 *Z) { | ||||||||||||||
#if (defined USE__FP16 && USE_NEON) | ||||||||||||||
nntrainer::neon::ewvm(N, X, Y, Z); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) | ||||||||||||||
Z[i] = X[i] * Y[i]; | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
static void ewva_FP16(const unsigned int N, const _FP16 *X, const _FP16 *Y, | ||||||||||||||
_FP16 *Z) { | ||||||||||||||
#if (defined USE__FP16 && USE_NEON) | ||||||||||||||
nntrainer::neon::ewva(N, X, Y, Z); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) | ||||||||||||||
Z[i] = X[i] + Y[i]; | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
void sscal(const unsigned int N, const float alpha, _FP16 *X, const int incX) { | ||||||||||||||
unsigned int incx = abs(incX); | ||||||||||||||
|
||||||||||||||
|
@@ -400,12 +381,64 @@ void scopy_int8_to_float16(const unsigned int N, const uint8_t *X, | |||||||||||||
copy_int8_to_fp16(N, X, incX, Y, incY); | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ewvm(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z) { | ||||||||||||||
ewvm_FP16(N, X, Y, Z); | ||||||||||||||
void ele_mul(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#if (defined USE__FP16 && USE_NEON) | ||||||||||||||
nntrainer::neon::ele_mul(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = static_cast<_FP16>(alpha) * X[i] * Y[i] + | ||||||||||||||
static_cast<_FP16>(beta) * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = static_cast<_FP16>(alpha) * X[i] * Y[i]; | ||||||||||||||
Comment on lines
+390
to
+394
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason to differentiate beta == 0 case?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if Z[i] != Z[i] ? (NaN in Z[i], or uninitialized Z) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. regardless of beta, wouldn't it cause an error anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if beta is zero, Z = X * Y + beta * Z would be Z = X * Y. for the case where NaN is in Z[i] or uninitialized Z, it would cause an error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't NaN * 0 = NaN ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, what I mean is having beta != 0 condition to avoid NaN or uninitialized error seems offbeat. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we could come up with a different way to handle these cases (e.g., check if tensor is initialized when using beta). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds more reasonable. |
||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ele_add(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#if (defined USE__FP16 && USE_NEON) | ||||||||||||||
nntrainer::neon::ele_add(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = X[i] + static_cast<_FP16>(alpha) * Y[i] + | ||||||||||||||
static_cast<_FP16>(beta) * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = X[i] + static_cast<_FP16>(alpha) * Y[i]; | ||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ele_sub(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#if (defined USE__FP16 && USE_NEON) | ||||||||||||||
nntrainer::neon::ele_sub(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = X[i] - static_cast<_FP16>(alpha) * Y[i] + | ||||||||||||||
static_cast<_FP16>(beta) * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = X[i] - static_cast<_FP16>(alpha) * Y[i]; | ||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ewva(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z) { | ||||||||||||||
ewva_FP16(N, X, Y, Z); | ||||||||||||||
void ele_div(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#if (defined USE__FP16 && USE_NEON) | ||||||||||||||
nntrainer::neon::ele_div(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = X[i] / (static_cast<_FP16>(alpha) * Y[i]) + | ||||||||||||||
static_cast<_FP16>(beta) * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = X[i] / (static_cast<_FP16>(alpha) * Y[i]); | ||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
_FP16 snrm2(const int N, const _FP16 *X, const int incX) { | ||||||||||||||
|
@@ -904,4 +937,61 @@ void inv_sqrt_inplace(const unsigned int N, float *X) { | |||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ele_mul(const unsigned int N, const float *X, const float *Y, float *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#ifdef USE_NEON | ||||||||||||||
nntrainer::neon::ele_mul(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = alpha * X[i] * Y[i] + beta * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = alpha * X[i] * Y[i]; | ||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ele_add(const unsigned int N, const float *X, const float *Y, float *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#ifdef USE_NEON | ||||||||||||||
nntrainer::neon::ele_add(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = X[i] + alpha * Y[i] + beta * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = X[i] + alpha * Y[i]; | ||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ele_sub(const unsigned int N, const float *X, const float *Y, float *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#ifdef USE_NEON | ||||||||||||||
nntrainer::neon::ele_sub(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = X[i] - alpha * Y[i] + beta * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = X[i] - alpha * Y[i]; | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void ele_div(const unsigned int N, const float *X, const float *Y, float *Z, | ||||||||||||||
float alpha, float beta) { | ||||||||||||||
#ifdef USE_NEON | ||||||||||||||
nntrainer::neon::ele_div(N, X, Y, Z, alpha, beta); | ||||||||||||||
#else | ||||||||||||||
for (unsigned int i = 0; i < N; ++i) { | ||||||||||||||
if (std::abs(beta) > __FLT_MIN__) | ||||||||||||||
Z[i] = X[i] / (alpha * Y[i]) + beta * Z[i]; | ||||||||||||||
else | ||||||||||||||
Z[i] = X[i] / (alpha * Y[i]); | ||||||||||||||
} | ||||||||||||||
#endif | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
} // namespace nntrainer |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -152,22 +152,56 @@ void sgemv(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA, const unsigned int M, | |
const unsigned int lda, const _FP16 *X, const int incX, | ||
const float beta, _FP16 *Y, const int incY); | ||
/** | ||
* @brief elementwise vector multiplication : Z = X ⊙ Y | ||
* @brief elementwise vector multiplication : Z = X ⊙ alpha * Y + | ||
* beta * Z | ||
* @param[in] N length of the vector | ||
* @param[in] X __fp16 * for Vector X | ||
* @param[in] Y __fp16 * for Vector Y | ||
* @param[in] Z __fp16 * for Vector Z | ||
*/ | ||
void ewvm(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z); | ||
* @param[in] X _FP16 * for Vector X | ||
* @param[in] Y _FP16 * for Vector Y | ||
* @param[in] Z _FP16 * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. quick question. it seems like scalars are added only for Y and Z. wouldn't there be cases where X also needs a scalar? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, why is scalar added in the first place? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for clarification :) |
||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_mul(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
|
||
/** | ||
* @brief elementwise vector addition : Z = X + Y | ||
* @brief elementwise vector addition : Z = X + alpha * Y + beta * | ||
* Z | ||
* @param[in] N length of the vector | ||
* @param[in] X __fp16 * for Vector X | ||
* @param[in] Y __fp16 * for Vector Y | ||
* @param[in] Z __fp16 * for Vector Z | ||
* @param[in] X _FP16 * for Vector X | ||
* @param[in] Y _FP16 * for Vector Y | ||
* @param[in] Z _FP16 * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ewva(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z); | ||
void ele_add(const unsigned int N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
/** | ||
* @brief elementwise vector subtraction with neon : Z = X - alpha * Y + | ||
* beta * Z | ||
* @param[in] N length of the vector | ||
* @param[in] X _FP16 * for Vector X | ||
* @param[in] Y _FP16 * for Vector Y | ||
* @param[in] Z _FP16 * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_sub(const unsigned N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
|
||
/** | ||
* @brief elementwise vector division with neon : Z = X / (alpha * Y) + beta | ||
* * Z | ||
* @note ZeroDivisionError is not guaranteed in this function | ||
* @param[in] N length of the vector | ||
* @param[in] X _FP16 * for Vector X | ||
* @param[in] Y _FP16 * for Vector Y | ||
* @param[in] Z _FP16 * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_div(const unsigned N, const _FP16 *X, const _FP16 *Y, _FP16 *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
|
||
/** | ||
* @brief isamax function : index of first maxima | ||
|
@@ -351,8 +385,7 @@ unsigned int isamax(const unsigned int N, const float *X, const int incX); | |
* @param[in] Y float * for Vector Y | ||
* @param[in] alpha float * for scaling angle (radian) | ||
*/ | ||
void sine(const unsigned int N, float *X, float *Y, | ||
float alpha = 1.0); | ||
void sine(const unsigned int N, float *X, float *Y, float alpha = 1.f); | ||
|
||
/** | ||
* @brief cosine with neon: Y = cos(alpha * X) | ||
|
@@ -361,8 +394,7 @@ void sine(const unsigned int N, float *X, float *Y, | |
* @param[in] Y float * for Vector Y | ||
* @param[in] alpha float * for scaling angle (radian) | ||
*/ | ||
void cosine(const unsigned int N, float *X, float *Y, | ||
float alpha = 1.0); | ||
void cosine(const unsigned int N, float *X, float *Y, float alpha = 1.f); | ||
|
||
/** | ||
* @brief inversed squared root transformation inplace : X = 1 / sqrt(X) | ||
|
@@ -371,6 +403,57 @@ void cosine(const unsigned int N, float *X, float *Y, | |
* @param X float * for Vector X | ||
*/ | ||
void inv_sqrt_inplace(const unsigned int N, float *X); | ||
/** | ||
* @brief elementwise vector multiplication : Z = X ⊙ alpha * Y + | ||
* beta * Z | ||
* @param[in] N length of the vector | ||
* @param[in] X float * for Vector X | ||
* @param[in] Y float * for Vector Y | ||
* @param[in] Z float * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_mul(const unsigned int N, const float *X, const float *Y, float *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
|
||
/** | ||
* @brief elementwise vector addition : Z = X + alpha * Y + beta * | ||
* Z | ||
* @param[in] N length of the vector | ||
* @param[in] X float * for Vector X | ||
* @param[in] Y float * for Vector Y | ||
* @param[in] Z float * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_add(const unsigned int N, const float *X, const float *Y, float *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
/** | ||
* @brief elementwise vector subtraction with neon : Z = X - alpha * Y + | ||
* beta * Z | ||
* @param[in] N length of the vector | ||
* @param[in] X float * for Vector X | ||
* @param[in] Y float * for Vector Y | ||
* @param[in] Z float * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_sub(const unsigned N, const float *X, const float *Y, float *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
|
||
/** | ||
* @brief elementwise vector division with neon : Z = X / (alpha * Y) + beta | ||
* * Z | ||
* @note ZeroDivisionError is not guaranteed in this function | ||
* @param[in] N length of the vector | ||
* @param[in] X float * for Vector X | ||
* @param[in] Y float * for Vector Y | ||
* @param[in] Z float * for Vector Z | ||
* @param[in] alpha scalar multiplier for input | ||
* @param[in] beta scalar multiplier for output | ||
*/ | ||
void ele_div(const unsigned N, const float *X, const float *Y, float *Z, | ||
float alpha = 1.f, float beta = 0.f); | ||
} /* namespace nntrainer */ | ||
#endif /* __cplusplus */ | ||
#endif /* __BLAS_INTERFACE_H__ */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one suggestion. how about renaming it into something more clear?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I asked for many contributors' opinions offline, but shorter function name is quite preferred.
ele_* sounds clear enough for me.