Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Work on Neon tests for future github action #185

Merged
merged 23 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f402c4c
Work on Neon tests for future github action
christophe0606 Jun 24, 2024
080065a
Github action to test he Neon code.
christophe0606 Jun 24, 2024
e7cb117
Install gcc in github action for Neon testing.
christophe0606 Jun 24, 2024
5a3d712
Debug github action with SSH.
christophe0606 Jun 24, 2024
112980d
Debug github action 2
christophe0606 Jun 24, 2024
0a2b90f
Removed one step in neon gh action.
christophe0606 Jun 24, 2024
fddda20
Removed SSH step in github action.
christophe0606 Jun 24, 2024
c34d887
Enabled ubuntu arm in github action for the neon tests
christophe0606 Jan 21, 2025
04d08cf
Debug ghactions for Neon
christophe0606 Jan 21, 2025
68d8271
Update gcc version in github action for neon tests
christophe0606 Jan 21, 2025
f9773e3
Improved cmake to build tests on Neon with gcc and clang.
christophe0606 Jan 22, 2025
662f0b4
Correct neon gh action
christophe0606 Jan 21, 2025
f6ba3eb
Correct build issue with gcc and neon version
christophe0606 Jan 22, 2025
28f63d2
Enable ninja build for Neon tests in github action
christophe0606 Jan 22, 2025
0d4c65b
Add Neon specific tests to the github action
christophe0606 Jan 22, 2025
398f221
gh action debug
christophe0606 Jan 22, 2025
c40eb28
Use make instead of ninja in Neon github action.
christophe0606 Jan 22, 2025
17950e4
Debug neon gh action
christophe0606 Jan 23, 2025
a9dda51
Corrected python script for Neon testing in gh action
christophe0606 Jan 23, 2025
f8b0e25
Update to biquad df2t f32 tests since the Neon version is disabled be…
christophe0606 Jan 23, 2025
83024f9
Renable pack and doc gh-actions
christophe0606 Jan 23, 2025
c16687a
Rename Neon gh action
christophe0606 Jan 23, 2025
391d684
Tuned test threshold
christophe0606 Jan 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions .github/workflows/runneontest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
name: Neon tests
on:
workflow_dispatch:
pull_request:
branches: [main]
push:
branches: [main]

permissions:
actions: read
security-events: write

jobs:
CI_test_run:
runs-on: ubuntu-22.04-arm

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install system packages
run: |
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt-get install libpython3.9 libtinfo5
sudo apt install build-essential
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 60 --slave /usr/bin/g++ g++ /usr/bin/g++-11
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 40 --slave /usr/bin/g++ g++ /usr/bin/g++-12
sudo update-alternatives --set gcc /usr/bin/gcc-12


- name: Activate vcpkg
uses: ARM-software/cmsis-actions/vcpkg@v1
with:
config: "./vcpkg-neon-configuration.json"

- name: Prepare framework
run: |
cd Testing
echo "Create missing folders"
mkdir FullBenchmark
mkdir Output
mkdir GeneratedInclude
mkdir GeneratedSource
mkdir GeneratedIncludeBench
mkdir GeneratedSourceBench
mkdir build

echo "Install missing python packages"
pip install -r requirements.txt

echo "Preprocess test description"
python preprocess.py -f desc.txt -o Output.pickle
python preprocess.py -f desc_neon.txt -o Output_neon.pickle
python preprocess.py -f desc_f16.txt -o Output_f16.pickle

echo "Generate missing CPP headers"
python processTests.py -gen . -p Patterns -d Parameters -f Output.pickle -e
python processTests.py -gen . -p Patterns -d Parameters -f Output_neon.pickle -e
python processTests.py -gen . -p Patterns -d Parameters -f Output_f16.pickle -e

cd build

cmake -G "Ninja" ..

# - name: Setup tmate session
# uses: mxschmitt/action-tmate@v3

- name: Execute generic tests
run: |
cd Testing/build

python ../processTests.py -p ../Patterns -d ../Parameters -gen .. -e -f ../Output.pickle
ninja
./test > result.txt
python ../processResult.py --noerr -e -f ../Output.pickle -r result.txt -html > result.html

- name: Execute neon specific C tests
run: |
cd Testing/build

python ../processTests.py -p ../Patterns -d ../Parameters -gen .. -e -f ../Output_neon.pickle
ninja
./test > result_neon.txt
python ../processResult.py --noerr -e -f ../Output_neon.pickle -r result_neon.txt -html > result_neon.html

- name: Execute f16 C tests
run: |
cd Testing/build

python ../processTests.py -p ../Patterns -d ../Parameters -gen .. -e -f ../Output_f16.pickle
ninja
./test > result_f16.txt
python ../processResult.py --noerr -e -f ../Output_f16.pickle -r result_f16.txt -html > result_f16.html

- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: neon-test-report
path: |
Testing/build/result.html
Testing/build/result_neon.html
Testing/build/result_f16.html


- name: Check error
run: |
cd Testing/build

echo "Checking output..."
test "$(grep "FAILED" result.html | wc -l)" -eq 0
test "$(grep "FAILED" result_neon.html | wc -l)" -eq 0
test "$(grep "FAILED" result_f16.html | wc -l)" -eq 0

89 changes: 49 additions & 40 deletions Include/dsp/matrix_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ extern "C"
\
for(_w=0;_w < nb; _w++) \
{ \
*data *= CAST v; \
*data = CAST *data * CAST v; \
data += _numCols; \
} \
}
Expand Down Expand Up @@ -178,54 +178,63 @@ extern "C"
} \
}

#define SCALE_ROW_F16(A,COL,v,i) \
{ \
#define SCALE_ROW_F16(A,COL,v,i) \
{ \
int32_t _w; \
float16_t *data = (A)->pData; \
float16_t *data = (A)->pData; \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
\
data += i*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*data++ *= (_Float16)v; \
} \
\
_Float16 sum; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *data; \
sum *= (_Float16)v; \
*data++ = sum; \
} \
}


#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ += (_Float16)v * (_Float16)*dataB++;\
} \
#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
_Float16 sum ; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *dataA; \
sum += (_Float16)v * (_Float16)*dataB++;\
*dataA++ = sum; \
} \
}

#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ -= (_Float16)v * (_Float16)*dataB++;\
} \
#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
_Float16 sum ; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *dataA; \
sum -= (_Float16)v * (_Float16)*dataB++;\
*dataA++ = sum; \
} \
}

#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
Expand Down
12 changes: 6 additions & 6 deletions Ne10/CMSIS_NE10_fft.neonintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,17 +110,17 @@

#define VDUPQ_N_F32(VAR) { VAR, VAR, VAR, VAR }

#define CONST_TW_81 0.70710678
#define CONST_TW_81N -0.70710678
#define CONST_TW_81 0.70710678f
#define CONST_TW_81N -0.70710678f

const static float32x4_t Q_TW_81 = VDUPQ_N_F32(CONST_TW_81 );
const static float32x4_t Q_TW_81N = VDUPQ_N_F32(CONST_TW_81N);
static const float32x4_t Q_TW_81 = VDUPQ_N_F32(CONST_TW_81 );
static const float32x4_t Q_TW_81N = VDUPQ_N_F32(CONST_TW_81N);

#define DIV_TW81 1.4142136f
#define DIV_TW81N -1.4142136f

const static float32x4_t DIV_TW81_NEON = VDUPQ_N_F32(DIV_TW81);
const static float32x4_t DIV_TW81N_NEON = VDUPQ_N_F32(DIV_TW81N);
static const float32x4_t DIV_TW81_NEON = VDUPQ_N_F32(DIV_TW81);
static const float32x4_t DIV_TW81N_NEON = VDUPQ_N_F32(DIV_TW81N);

#define NE10_RADIX8x4_R2C_NEON_KERNEL_S1(Q_OUT,Q_IN) do { \
Q_OUT ## 0 = vaddq_f32 (Q_IN ## 0, Q_IN ## 4); \
Expand Down
8 changes: 4 additions & 4 deletions Ne10/CMSIS_NE10_fft.neonintrinsic_f16.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@
#define CONST_TW_81 0.70710678f16
#define CONST_TW_81N -0.70710678f16

const static float16x4_t Q_TW_81 = VDUPQ_N_F16(CONST_TW_81 );
const static float16x4_t Q_TW_81N = VDUPQ_N_F16(CONST_TW_81N);
static const float16x4_t Q_TW_81 = VDUPQ_N_F16(CONST_TW_81 );
static const float16x4_t Q_TW_81N = VDUPQ_N_F16(CONST_TW_81N);

#define DIV_TW81 1.4142136f16
#define DIV_TW81N -1.4142136f16

const static float16x4_t DIV_TW81_NEON = VDUPQ_N_F16(DIV_TW81);
const static float16x4_t DIV_TW81N_NEON = VDUPQ_N_F16(DIV_TW81N);
static const float16x4_t DIV_TW81_NEON = VDUPQ_N_F16(DIV_TW81);
static const float16x4_t DIV_TW81N_NEON = VDUPQ_N_F16(DIV_TW81N);

#define NE10_RADIX8x4_R2C_NEON_KERNEL_S1(Q_OUT,Q_IN) do { \
Q_OUT ## 0 = vadd_f16 (Q_IN ## 0, Q_IN ## 4); \
Expand Down
40 changes: 20 additions & 20 deletions Ne10/CMSIS_NE10_fft_common_variables.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,60 +40,60 @@
///////////////////////////

/* Twiddles used in Radix-8 FFT */
const static ne10_float32_t TW_81_F32 = 0.70710678; // sqrt (2) / 2
const static ne10_float32_t TW_81N_F32 = -0.70710678; // - TW_81_F32
static const ne10_float32_t TW_81_F32 = 0.70710678f; // sqrt (2) / 2
static const ne10_float32_t TW_81N_F32 = -0.70710678f; // - TW_81_F32

/* Twiddles used in Radix-5 FFT */
const static ne10_fft_cpx_float32_t TW_5A_F32 =
static const ne10_fft_cpx_float32_t TW_5A_F32 =
{
0.309016994374947, // cos (2 * pi / 5)
-0.951056516295154 // - sin (2 * pi / 5)
0.309016994374947f, // cos (2 * pi / 5)
-0.951056516295154f // - sin (2 * pi / 5)
};
const static ne10_fft_cpx_int32_t TW_5A_S32 =
static const ne10_fft_cpx_int32_t TW_5A_S32 =
{
663608942, // round (TW_5A_F32.r * 2^31)
-2042378317 // round (TW_5A_F32.i * 2^31)
};

const static ne10_fft_cpx_float32_t TW_5B_F32 =
static const ne10_fft_cpx_float32_t TW_5B_F32 =
{
-0.809016994374947, // cos (4 * pi / 5)
-0.587785252292473 // - sin (4 * pi / 5)
-0.809016994374947f, // cos (4 * pi / 5)
-0.587785252292473f // - sin (4 * pi / 5)
};
const static ne10_fft_cpx_int32_t TW_5B_S32 =
static const ne10_fft_cpx_int32_t TW_5B_S32 =
{
-1737350766, // round (TW_5B_F32.r * 2^31)
-1262259218 // round (TW_5B_F32.i * 2^31)
};

/* Twiddles used in Radix-3 FFT */
const static ne10_float32_t TW_3I_F32 = 0.866025403784439; // sqrt (3) / 2
const static ne10_float32_t TW_3IN_F32 = - 0.866025403784439; // - TW_3IN_F32
const static ne10_int32_t TW_3I_S32 = 1859775393; // round (TW_3I_F32 * 2^31)
const static ne10_int32_t TW_3IN_S32 = -1859775393; // round (TW_3IN_F32 * 2^31)
static const ne10_float32_t TW_3I_F32 = 0.866025403784439f; // sqrt (3) / 2
static const ne10_float32_t TW_3IN_F32 = - 0.866025403784439f; // - TW_3IN_F32
static const ne10_int32_t TW_3I_S32 = 1859775393; // round (TW_3I_F32 * 2^31)
static const ne10_int32_t TW_3IN_S32 = -1859775393; // round (TW_3IN_F32 * 2^31)

#if defined(ARM_MATH_NEON_FLOAT16) && defined(ARM_FLOAT16_SUPPORTED)

/* Twiddles used in Radix-8 FFT */
const static ne10_float16_t TW_81_F16 = 0.70710678f16; // sqrt (2) / 2
const static ne10_float16_t TW_81N_F16 = -0.70710678f16; // - TW_81_F32
static const ne10_float16_t TW_81_F16 = 0.70710678f16; // sqrt (2) / 2
static const ne10_float16_t TW_81N_F16 = -0.70710678f16; // - TW_81_F32

/* Twiddles used in Radix-5 FFT */
const static ne10_fft_cpx_float16_t TW_5A_F16 =
static const ne10_fft_cpx_float16_t TW_5A_F16 =
{
0.309016994374947f16, // cos (2 * pi / 5)
-0.951056516295154f16 // - sin (2 * pi / 5)
};

const static ne10_fft_cpx_float16_t TW_5B_F16 =
static const ne10_fft_cpx_float16_t TW_5B_F16 =
{
-0.809016994374947f16, // cos (4 * pi / 5)
-0.587785252292473f16 // - sin (4 * pi / 5)
};

/* Twiddles used in Radix-3 FFT */
const static ne10_float16_t TW_3I_F16 = 0.866025403784439f16; // sqrt (3) / 2
const static ne10_float16_t TW_3IN_F16 = - 0.866025403784439f16; // - TW_3IN_F32
static const ne10_float16_t TW_3I_F16 = 0.866025403784439f16; // sqrt (3) / 2
static const ne10_float16_t TW_3IN_F16 = - 0.866025403784439f16; // - TW_3IN_F32
#endif

#endif // NE10_FFT_COMMON_VARIBLES_H
Loading