Skip to content

Commit

Permalink
[ rename/neon ] elementwise_* function to its abbreviation
Browse files Browse the repository at this point in the history
- Again, namespace nntrainer::neon differentiates the functions with the same name
- change to ewvm, ewva (abbreviation descriptions can be found on the header file)

**Self evaluation:**
1. Build test:     [X]Passed [ ]Failed [ ]Skipped
2. Run test:     [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: skykongkong8 <[email protected]>
  • Loading branch information
skykongkong8 authored and jijoongmoon committed Feb 14, 2024
1 parent d81a36c commit ebcfbb1
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 19 deletions.
22 changes: 11 additions & 11 deletions nntrainer/tensor/blas_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
} \
} while (0);

#define hgemv_loop(ci, cj, cM, cN) \
#define hgemv_loop(ci, cj, cM, cN) \
do { \
float y0; \
unsigned int i, j; \
Expand All @@ -45,14 +45,14 @@
} \
} while (0);

#define haxpy_loop() \
#define haxpy_loop() \
do { \
unsigned int i; \
for (i = 0; i < N; ++i) \
Y[i * incY] = Y[i * incY] + static_cast<_FP16>(alpha) * X[i * incX]; \
} while (0);

#define hgemm_loop() \
#define hgemm_loop() \
do { \
for (unsigned int m = 0; m < M; ++m) { \
for (unsigned int n = 0; n < N; ++n) { \
Expand Down Expand Up @@ -171,7 +171,7 @@ static void scopy_FP16(const unsigned int N, const _FP16 *X, const int incX,
}

static void copy_float32_to_float16(const unsigned int N, const float *X,
const int incX, _FP16 *Y, const int incY) {
const int incX, _FP16 *Y, const int incY) {
unsigned int incy = abs(incY);
unsigned int incx = abs(incX);

Expand All @@ -189,7 +189,7 @@ static void copy_float32_to_float16(const unsigned int N, const float *X,
}

static void copy_float16_to_float32(const unsigned int N, const _FP16 *X,
const int incX, float *Y, const int incY) {
const int incX, float *Y, const int incY) {
unsigned int incy = abs(incY);
unsigned int incx = abs(incX);

Expand All @@ -207,7 +207,7 @@ static void copy_float16_to_float32(const unsigned int N, const _FP16 *X,
}

static void copy_int4_to_fp16(const unsigned int N, const uint8_t *X,
const int incX, _FP16 *Y, const int incY) {
const int incX, _FP16 *Y, const int incY) {
unsigned int incy = abs(incY);
unsigned int incx = abs(incX);

Expand All @@ -227,7 +227,7 @@ static void copy_int4_to_fp16(const unsigned int N, const uint8_t *X,
}

static void copy_int8_to_fp16(const unsigned int N, const uint8_t *X,
const int incX, _FP16 *Y, const int incY) {
const int incX, _FP16 *Y, const int incY) {
unsigned int incy = abs(incY);
unsigned int incx = abs(incX);

Expand All @@ -248,7 +248,7 @@ static void copy_int8_to_fp16(const unsigned int N, const uint8_t *X,
static void ewvm_FP16(const unsigned int N, const _FP16 *X, const _FP16 *Y,
_FP16 *Z) {
#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::elementwise_vector_multiplication(N, X, Y, Z);
nntrainer::neon::ewvm(N, X, Y, Z);
#else
for (unsigned int i = 0; i < N; ++i)
Z[i] = X[i] * Y[i];
Expand All @@ -258,7 +258,7 @@ static void ewvm_FP16(const unsigned int N, const _FP16 *X, const _FP16 *Y,
static void ewva_FP16(const unsigned int N, const _FP16 *X, const _FP16 *Y,
_FP16 *Z) {
#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::elementwise_vector_addition(N, X, Y, Z);
nntrainer::neon::ewva(N, X, Y, Z);
#else
for (unsigned int i = 0; i < N; ++i)
Z[i] = X[i] + Y[i];
Expand Down Expand Up @@ -311,8 +311,8 @@ static void sgemm_FP16(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA,
const unsigned int ldc) {

#if (defined USE__FP16 && USE_NEON)
nntrainer::neon::hgemm(A, B, C, M, N, K, alpha, beta,
TransA == CblasTrans, TransB == CblasTrans);
nntrainer::neon::hgemm(A, B, C, M, N, K, alpha, beta, TransA == CblasTrans,
TransB == CblasTrans);
#else
float *A_ = new float[M * K];
float *B_ = new float[N * K];
Expand Down
6 changes: 2 additions & 4 deletions nntrainer/tensor/blas_neon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2014,8 +2014,7 @@ void hgemm_transAB(const __fp16 *A, const __fp16 *B, float *C, uint32_t M,
}
}

void elementwise_vector_multiplication(const unsigned int N, const __fp16 *X,
const __fp16 *Y, __fp16 *Z) {
void ewvm(const unsigned int N, const __fp16 *X, const __fp16 *Y, __fp16 *Z) {
unsigned int i = 0;
for (; N - i >= 8; i += 8) {
float16x8_t x0_7 = vld1q_f16(&X[i]);
Expand All @@ -2030,8 +2029,7 @@ void elementwise_vector_multiplication(const unsigned int N, const __fp16 *X,
}
}

void elementwise_vector_addition(const unsigned int N, const __fp16 *X,
const __fp16 *Y, __fp16 *Z) {
void ewva(const unsigned int N, const __fp16 *X, const __fp16 *Y, __fp16 *Z) {
unsigned int i = 0;
for (; N - i >= 8; i += 8) {
float16x8_t x0_7 = vld1q_f16(&X[i]);
Expand Down
6 changes: 2 additions & 4 deletions nntrainer/tensor/blas_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,15 @@ void hgemv(const __fp16 *A, const __fp16 *X, __fp16 *Y, uint32_t rows,
* @param[in] Y __fp16 * for Vector Y
* @param[in] Z __fp16 * for Vector Z
*/
void elementwise_vector_multiplication(const unsigned N, const __fp16 *X,
const __fp16 *Y, __fp16 *Z);
void ewvm(const unsigned N, const __fp16 *X, const __fp16 *Y, __fp16 *Z);
/**
* @brief elementwise vector addition with neon : Z = X + Y
* @param[in] N length of the vector
* @param[in] X __fp16 * for Vector X
* @param[in] Y __fp16 * for Vector Y
* @param[in] Z __fp16 * for Vector Z
*/
void elementwise_vector_addition(const unsigned N, const __fp16 *X,
const __fp16 *Y, __fp16 *Z);
void ewva(const unsigned N, const __fp16 *X, const __fp16 *Y, __fp16 *Z);

/**
* @brief transposed hgemv computation with neon
Expand Down

0 comments on commit ebcfbb1

Please sign in to comment.