Skip to content

Commit

Permalink
[Blas] copy functionality for signed int16 data type
Browse files Browse the repository at this point in the history
This pull request aims to add the functionality of copying the int16 data type into fp32.
Please note that this implementation does not utilize SIMD at this time.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <[email protected]>
  • Loading branch information
djeong20 authored and jijoongmoon committed Jan 17, 2025
1 parent ebae34b commit ee3b1d5
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 0 deletions.
9 changes: 9 additions & 0 deletions nntrainer/tensor/blas_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,15 @@ void scopy_int8_to_float32(const unsigned int N, const int8_t *X,
}
}

void copy_s16_fp32(const unsigned int N, const int16_t *X, float *Y) {
#ifdef USE_NEON
nntrainer::neon::copy_s16_fp32(N, X, Y);
#endif
for (unsigned int idx = 0; idx < N; ++idx) {
Y[idx] = (float)X[idx];
}
}

float snrm2(const int N, const float *X, const int incX) {
#ifdef USE_BLAS
#ifdef BLAS_NUM_THREADS
Expand Down
8 changes: 8 additions & 0 deletions nntrainer/tensor/blas_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,14 @@ void scopy_int8_to_float32(const unsigned int N, const uint8_t *X,
void scopy_int8_to_float32(const unsigned int N, const int8_t *X,
const int incX, float *Y, const int intY);

/**
* @brief copy function : Y = X
* @param[in] N number of elements in X
* @param[in] X int16_t * for Vector X
* @param[in] Y float * for Vector Y
*/
void copy_s16_fp32(const unsigned int N, const int16_t *X, float *Y);

/**
* @brief sdot computation : sum of all X * Y
* @param[in] N number of elements in Y
Expand Down
8 changes: 8 additions & 0 deletions nntrainer/tensor/blas_neon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,14 @@ void copy_int8_to_fp32(const unsigned int N, const int8_t *X, float *Y) {
}
}

void copy_s16_fp32(const unsigned int N, const int16_t *X, float *Y) {
/// @todo implement int16_t to fp32
unsigned int idx = 0;
for (; (N - idx) >= 1; ++idx) {
Y[idx] = X[idx];
}
}

void copy_fp16_to_fp32(const unsigned int N, const __fp16 *X, float *Y) {
unsigned int idx = 0;

Expand Down
9 changes: 9 additions & 0 deletions nntrainer/tensor/blas_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ void copy_int8_or_int4(const unsigned int N, const uint8_t *X, uint8_t *Y);
* @param[in] Y int8_t * for Vector Y
*/
void copy_int8(const unsigned int N, const int8_t *X, int8_t *Y);

/**
* @brief copy function with neon: Y = X
* @param[in] N number of elements in X
* @param[in] X int16_t * for Vector X
* @param[in] Y float * for Vector Y
*/
void copy_s16_fp32(const unsigned int N, const int16_t *X, float *Y);

/**
* @brief sine with neon: Y = sin(alpha * X)
* @param[in] N number of elements in X
Expand Down
3 changes: 3 additions & 0 deletions nntrainer/tensor/float_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,9 @@ void FloatTensor::copyData(const Tensor &from) {
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
break;
case ml::train::TensorDim::DataType::QINT16:
copy_s16_fp32(from.size(), from.getData<int16_t>(), (float *)getData());
break;
case ml::train::TensorDim::DataType::QINT8:
scopy_int8_to_float32(from.size(), from.getData<int8_t>(), 1,
(float *)getData(), 1);
Expand Down

0 comments on commit ee3b1d5

Please sign in to comment.