Skip to content

Commit

Permalink
Github action for testing Neon code
Browse files Browse the repository at this point in the history
New github action
Tuning of the tests
Update of test CMakeLists.txt
  • Loading branch information
christophe0606 authored Jan 24, 2025
1 parent fde38c3 commit 04ee0b6
Show file tree
Hide file tree
Showing 59 changed files with 1,978 additions and 1,760 deletions.
118 changes: 118 additions & 0 deletions .github/workflows/runneontest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
name: Neon tests
on:
workflow_dispatch:
pull_request:
branches: [main]
push:
branches: [main]

permissions:
actions: read
security-events: write

jobs:
CI_test_run:
runs-on: ubuntu-22.04-arm

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install system packages
run: |
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt-get install libpython3.9 libtinfo5
sudo apt install build-essential
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 60 --slave /usr/bin/g++ g++ /usr/bin/g++-11
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 40 --slave /usr/bin/g++ g++ /usr/bin/g++-12
sudo update-alternatives --set gcc /usr/bin/gcc-12
- name: Activate vcpkg
uses: ARM-software/cmsis-actions/vcpkg@v1
with:
config: "./vcpkg-neon-configuration.json"

- name: Prepare framework
run: |
cd Testing
echo "Create missing folders"
mkdir FullBenchmark
mkdir Output
mkdir GeneratedInclude
mkdir GeneratedSource
mkdir GeneratedIncludeBench
mkdir GeneratedSourceBench
mkdir build
echo "Install missing python packages"
pip install -r requirements.txt
echo "Preprocess test description"
python preprocess.py -f desc.txt -o Output.pickle
python preprocess.py -f desc_neon.txt -o Output_neon.pickle
python preprocess.py -f desc_f16.txt -o Output_f16.pickle
echo "Generate missing CPP headers"
python processTests.py -gen . -p Patterns -d Parameters -f Output.pickle -e
python processTests.py -gen . -p Patterns -d Parameters -f Output_neon.pickle -e
python processTests.py -gen . -p Patterns -d Parameters -f Output_f16.pickle -e
cd build
cmake -G "Ninja" ..
# - name: Setup tmate session
# uses: mxschmitt/action-tmate@v3

- name: Execute generic tests
run: |
cd Testing/build
python ../processTests.py -p ../Patterns -d ../Parameters -gen .. -e -f ../Output.pickle
ninja
./test > result.txt
python ../processResult.py --noerr -e -f ../Output.pickle -r result.txt -html > result.html
- name: Execute neon specific C tests
run: |
cd Testing/build
python ../processTests.py -p ../Patterns -d ../Parameters -gen .. -e -f ../Output_neon.pickle
ninja
./test > result_neon.txt
python ../processResult.py --noerr -e -f ../Output_neon.pickle -r result_neon.txt -html > result_neon.html
- name: Execute f16 C tests
run: |
cd Testing/build
python ../processTests.py -p ../Patterns -d ../Parameters -gen .. -e -f ../Output_f16.pickle
ninja
./test > result_f16.txt
python ../processResult.py --noerr -e -f ../Output_f16.pickle -r result_f16.txt -html > result_f16.html
- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: neon-test-report
path: |
Testing/build/result.html
Testing/build/result_neon.html
Testing/build/result_f16.html
- name: Check error
run: |
cd Testing/build
echo "Checking output..."
test "$(grep "FAILED" result.html | wc -l)" -eq 0
test "$(grep "FAILED" result_neon.html | wc -l)" -eq 0
test "$(grep "FAILED" result_f16.html | wc -l)" -eq 0
89 changes: 49 additions & 40 deletions Include/dsp/matrix_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ extern "C"
\
for(_w=0;_w < nb; _w++) \
{ \
*data *= CAST v; \
*data = CAST *data * CAST v; \
data += _numCols; \
} \
}
Expand Down Expand Up @@ -178,54 +178,63 @@ extern "C"
} \
}

#define SCALE_ROW_F16(A,COL,v,i) \
{ \
#define SCALE_ROW_F16(A,COL,v,i) \
{ \
int32_t _w; \
float16_t *data = (A)->pData; \
float16_t *data = (A)->pData; \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
\
data += i*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*data++ *= (_Float16)v; \
} \
\
_Float16 sum; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *data; \
sum *= (_Float16)v; \
*data++ = sum; \
} \
}


#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ += (_Float16)v * (_Float16)*dataB++;\
} \
#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
_Float16 sum ; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *dataA; \
sum += (_Float16)v * (_Float16)*dataB++;\
*dataA++ = sum; \
} \
}

#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ -= (_Float16)v * (_Float16)*dataB++;\
} \
#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
_Float16 sum ; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *dataA; \
sum -= (_Float16)v * (_Float16)*dataB++;\
*dataA++ = sum; \
} \
}

#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
Expand Down
12 changes: 6 additions & 6 deletions Ne10/CMSIS_NE10_fft.neonintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,17 +110,17 @@

#define VDUPQ_N_F32(VAR) { VAR, VAR, VAR, VAR }

#define CONST_TW_81 0.70710678
#define CONST_TW_81N -0.70710678
#define CONST_TW_81 0.70710678f
#define CONST_TW_81N -0.70710678f

const static float32x4_t Q_TW_81 = VDUPQ_N_F32(CONST_TW_81 );
const static float32x4_t Q_TW_81N = VDUPQ_N_F32(CONST_TW_81N);
static const float32x4_t Q_TW_81 = VDUPQ_N_F32(CONST_TW_81 );
static const float32x4_t Q_TW_81N = VDUPQ_N_F32(CONST_TW_81N);

#define DIV_TW81 1.4142136f
#define DIV_TW81N -1.4142136f

const static float32x4_t DIV_TW81_NEON = VDUPQ_N_F32(DIV_TW81);
const static float32x4_t DIV_TW81N_NEON = VDUPQ_N_F32(DIV_TW81N);
static const float32x4_t DIV_TW81_NEON = VDUPQ_N_F32(DIV_TW81);
static const float32x4_t DIV_TW81N_NEON = VDUPQ_N_F32(DIV_TW81N);

#define NE10_RADIX8x4_R2C_NEON_KERNEL_S1(Q_OUT,Q_IN) do { \
Q_OUT ## 0 = vaddq_f32 (Q_IN ## 0, Q_IN ## 4); \
Expand Down
8 changes: 4 additions & 4 deletions Ne10/CMSIS_NE10_fft.neonintrinsic_f16.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@
#define CONST_TW_81 0.70710678f16
#define CONST_TW_81N -0.70710678f16

const static float16x4_t Q_TW_81 = VDUPQ_N_F16(CONST_TW_81 );
const static float16x4_t Q_TW_81N = VDUPQ_N_F16(CONST_TW_81N);
static const float16x4_t Q_TW_81 = VDUPQ_N_F16(CONST_TW_81 );
static const float16x4_t Q_TW_81N = VDUPQ_N_F16(CONST_TW_81N);

#define DIV_TW81 1.4142136f16
#define DIV_TW81N -1.4142136f16

const static float16x4_t DIV_TW81_NEON = VDUPQ_N_F16(DIV_TW81);
const static float16x4_t DIV_TW81N_NEON = VDUPQ_N_F16(DIV_TW81N);
static const float16x4_t DIV_TW81_NEON = VDUPQ_N_F16(DIV_TW81);
static const float16x4_t DIV_TW81N_NEON = VDUPQ_N_F16(DIV_TW81N);

#define NE10_RADIX8x4_R2C_NEON_KERNEL_S1(Q_OUT,Q_IN) do { \
Q_OUT ## 0 = vadd_f16 (Q_IN ## 0, Q_IN ## 4); \
Expand Down
40 changes: 20 additions & 20 deletions Ne10/CMSIS_NE10_fft_common_variables.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,60 +40,60 @@
///////////////////////////

/* Twiddles used in Radix-8 FFT */
const static ne10_float32_t TW_81_F32 = 0.70710678; // sqrt (2) / 2
const static ne10_float32_t TW_81N_F32 = -0.70710678; // - TW_81_F32
static const ne10_float32_t TW_81_F32 = 0.70710678f; // sqrt (2) / 2
static const ne10_float32_t TW_81N_F32 = -0.70710678f; // - TW_81_F32

/* Twiddles used in Radix-5 FFT */
const static ne10_fft_cpx_float32_t TW_5A_F32 =
static const ne10_fft_cpx_float32_t TW_5A_F32 =
{
0.309016994374947, // cos (2 * pi / 5)
-0.951056516295154 // - sin (2 * pi / 5)
0.309016994374947f, // cos (2 * pi / 5)
-0.951056516295154f // - sin (2 * pi / 5)
};
const static ne10_fft_cpx_int32_t TW_5A_S32 =
static const ne10_fft_cpx_int32_t TW_5A_S32 =
{
663608942, // round (TW_5A_F32.r * 2^31)
-2042378317 // round (TW_5A_F32.i * 2^31)
};

const static ne10_fft_cpx_float32_t TW_5B_F32 =
static const ne10_fft_cpx_float32_t TW_5B_F32 =
{
-0.809016994374947, // cos (4 * pi / 5)
-0.587785252292473 // - sin (4 * pi / 5)
-0.809016994374947f, // cos (4 * pi / 5)
-0.587785252292473f // - sin (4 * pi / 5)
};
const static ne10_fft_cpx_int32_t TW_5B_S32 =
static const ne10_fft_cpx_int32_t TW_5B_S32 =
{
-1737350766, // round (TW_5B_F32.r * 2^31)
-1262259218 // round (TW_5B_F32.i * 2^31)
};

/* Twiddles used in Radix-3 FFT */
const static ne10_float32_t TW_3I_F32 = 0.866025403784439; // sqrt (3) / 2
const static ne10_float32_t TW_3IN_F32 = - 0.866025403784439; // - TW_3IN_F32
const static ne10_int32_t TW_3I_S32 = 1859775393; // round (TW_3I_F32 * 2^31)
const static ne10_int32_t TW_3IN_S32 = -1859775393; // round (TW_3IN_F32 * 2^31)
static const ne10_float32_t TW_3I_F32 = 0.866025403784439f; // sqrt (3) / 2
static const ne10_float32_t TW_3IN_F32 = - 0.866025403784439f; // - TW_3IN_F32
static const ne10_int32_t TW_3I_S32 = 1859775393; // round (TW_3I_F32 * 2^31)
static const ne10_int32_t TW_3IN_S32 = -1859775393; // round (TW_3IN_F32 * 2^31)

#if defined(ARM_MATH_NEON_FLOAT16) && defined(ARM_FLOAT16_SUPPORTED)

/* Twiddles used in Radix-8 FFT */
const static ne10_float16_t TW_81_F16 = 0.70710678f16; // sqrt (2) / 2
const static ne10_float16_t TW_81N_F16 = -0.70710678f16; // - TW_81_F32
static const ne10_float16_t TW_81_F16 = 0.70710678f16; // sqrt (2) / 2
static const ne10_float16_t TW_81N_F16 = -0.70710678f16; // - TW_81_F32

/* Twiddles used in Radix-5 FFT */
const static ne10_fft_cpx_float16_t TW_5A_F16 =
static const ne10_fft_cpx_float16_t TW_5A_F16 =
{
0.309016994374947f16, // cos (2 * pi / 5)
-0.951056516295154f16 // - sin (2 * pi / 5)
};

const static ne10_fft_cpx_float16_t TW_5B_F16 =
static const ne10_fft_cpx_float16_t TW_5B_F16 =
{
-0.809016994374947f16, // cos (4 * pi / 5)
-0.587785252292473f16 // - sin (4 * pi / 5)
};

/* Twiddles used in Radix-3 FFT */
const static ne10_float16_t TW_3I_F16 = 0.866025403784439f16; // sqrt (3) / 2
const static ne10_float16_t TW_3IN_F16 = - 0.866025403784439f16; // - TW_3IN_F32
static const ne10_float16_t TW_3I_F16 = 0.866025403784439f16; // sqrt (3) / 2
static const ne10_float16_t TW_3IN_F16 = - 0.866025403784439f16; // - TW_3IN_F32
#endif

#endif // NE10_FFT_COMMON_VARIBLES_H
Loading

0 comments on commit 04ee0b6

Please sign in to comment.