Skip to content

Commit

Permalink
Enable QD8-F32-QC4W 14x16c8 avx512 VNNI
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 641526280
  • Loading branch information
fbarchard authored and xnnpack-bot committed Jun 8, 2024
1 parent bed314a commit 1a5f28e
Show file tree
Hide file tree
Showing 14 changed files with 538 additions and 621 deletions.
2 changes: 1 addition & 1 deletion cmake/gen/hvx_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ SET(ALL_HVX_MICROKERNEL_SRCS
src/f32-vbinary/gen/f32-vsqrdiff-hvx-u128.c
src/f32-vbinary/gen/f32-vsub-minmax-hvx-u32.c
src/f32-vbinary/gen/f32-vsub-minmax-hvx-u64.c
src/f32-vbinary/gen/f32-vsub-minmax-hvx-u128.c)
src/f32-vbinary/gen/f32-vsub-minmax-hvx-u128.c)
324 changes: 0 additions & 324 deletions src/amalgam/gen/avx512vnni.c

Large diffs are not rendered by default.

694 changes: 466 additions & 228 deletions src/amalgam/gen/avx512vnnigfni.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/amalgam/gen/scalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <fp16/fp16.h>
#include <assert.h>
#include <fxdiv.h>
#include <math.h>
#include <simd/f32-scalar.h>
Expand Down
8 changes: 4 additions & 4 deletions src/configs/gemm-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -1665,21 +1665,21 @@ static void init_qd8_f32_qc4w_gemm_config(void) {
#endif
if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnnigfni) {
qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni_prfm);
qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni_prfm);
qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(14)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni_prfm);
qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512vnni_params;
qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w;
qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w;
qd8_f32_qc4w_gemm_config.mr = 7;
qd8_f32_qc4w_gemm_config.mr = 14;
qd8_f32_qc4w_gemm_config.nr = 16;
qd8_f32_qc4w_gemm_config.log2_kr = 3;
qd8_f32_qc4w_gemm_config.planes = 2;
} else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) {
qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnni_prfm);
qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni_prfm);
qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni_prfm);
qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512vnni_params;
qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w;
qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w;
qd8_f32_qc4w_gemm_config.mr = 7;
qd8_f32_qc4w_gemm_config.mr = 14;
qd8_f32_qc4w_gemm_config.nr = 16;
qd8_f32_qc4w_gemm_config.log2_kr = 3;
qd8_f32_qc4w_gemm_config.planes = 2;
Expand Down
3 changes: 1 addition & 2 deletions src/qs8-rdsum/gen/qs8-rdsum-7p7x-minmax-fp32-avx2-c32.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,8 @@ void xnn_qs8_rdsum_ukernel_7p7x__avx2_c32(
// 256 int8s may be summed into an int16 before overflowing
// To prevent handling the tails of the inner 256 loop, we round 256 down to
// the nearest integer multiple of ACCUMULATORS.
int num_batches = floor((rows + 251) / 252);
int r = rows;
for (; num_batches > 0; --num_batches) {
while (r > 0) {
__m256i vacc16_0123456789ABCDEF = _mm256_setzero_si256();
__m256i vacc16_GHIJKLMNOPQRSTUV = _mm256_setzero_si256();
for (int current_batch = min(r, 252); current_batch > 0; current_batch -= 7) {
Expand Down
3 changes: 1 addition & 2 deletions src/qs8-rdsum/gen/qs8-rdsum-7p7x-minmax-fp32-avx2-c64.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ void xnn_qs8_rdsum_ukernel_7p7x__avx2_c64(
// 256 int8s may be summed into an int16 before overflowing
// To prevent handling the tails of the inner 256 loop, we round 256 down to
// the nearest integer multiple of ACCUMULATORS.
int num_batches = floor((rows + 251) / 252);
int r = rows;
for (; num_batches > 0; --num_batches) {
while (r > 0) {
__m256i vacc16_0123456789ABCDEF = _mm256_setzero_si256();
__m256i vacc16_GHIJKLMNOPQRSTUV = _mm256_setzero_si256();
__m256i vacc16_WXYZabcderfghijl = _mm256_setzero_si256();
Expand Down
8 changes: 4 additions & 4 deletions src/xnnpack/igemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,13 +415,13 @@ DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_1x4v__rvv
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_7x4v__rvv)

DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_1x32__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_8x32__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_16x32__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_1x64__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_4x64__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_7x64__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_1x128__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_2x128__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_4x64__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_7x64__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_8x32__hvx_broadcast)
DECLARE_F32_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_igemm_minmax_ukernel_16x32__hvx_broadcast)

#define DECLARE_F16_IGEMM_MINMAX_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
Expand Down
6 changes: 3 additions & 3 deletions src/xnnpack/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,15 +450,15 @@ DECLARE_F16_F32ACC_RDSUM_UKERNEL_FUNCTION(xnn_f16_f32acc_rdsum_ukernel_7p7x__neo
int32_t* output, \
const union xnn_qs8_rsum_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__scalar_c4)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__avx2_c32)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__avx2_c64)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__neon_c16)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__neon_c32)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__neon_c64)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__scalar_c4)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__sse41_c16)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__sse41_c32)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__sse41_c64)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__avx2_c32)
DECLARE_QS8_RDSUM_UKERNEL_FUNCTION(xnn_qs8_rdsum_ukernel_7p7x__avx2_c64)

#ifdef __cplusplus
} // extern "C"
Expand Down
17 changes: 0 additions & 17 deletions src/xnnpack/vunary.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,6 @@ DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__sse2_u8)
DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__sse2_u16)

#define DECLARE_F32_VNEG_UKERNEL_FUNCTION(fn_name) \
DECLARE_F32_UKERNEL_FUNCTION(fn_name, xnn_f32_default_params);

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__rvv_u1v)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__rvv_u2v)
Expand Down Expand Up @@ -919,7 +918,6 @@ DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__f16c_u8)
DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__f16c_u16)

#define DECLARE_F32_VSQR_UKERNEL_FUNCTION(fn_name) \
DECLARE_F32_UKERNEL_FUNCTION(fn_name, xnn_f32_default_params);

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__rvv_u1v)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__rvv_u2v)
Expand Down Expand Up @@ -1253,11 +1251,8 @@ DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr

DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__scalar_rational_9_6_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__scalar_rational_9_6_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__scalar_rational_9_6_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__scalar_rational_9_6_div_u8)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_rational_9_6_div_u4)
Expand Down Expand Up @@ -1300,31 +1295,19 @@ DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512f_rational_9_6_n
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512f_rational_9_6_nr_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512f_rational_9_6_nr_u64)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__wasmsimd_rational_9_6_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__wasmsimd_rational_9_6_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__wasmsimd_rational_9_6_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__wasmsimd_rational_9_6_div_u16)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_div_u16)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(
xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u16)

#define DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(fn_name) \
Expand Down
59 changes: 37 additions & 22 deletions test/f32-igemm-minmax-2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3180,45 +3180,51 @@ INSTANTIATE_TEST_SUITE_P(
#endif // XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_8X32__HVX_BROADCAST, GemmTest,
F32_IGEMM_MINMAX_1X128__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
/*k_block=*/1,
/*adj_k_block=*/1,
/*mr=*/8, /*nr=*/32, /*kr=*/1, /*sr=*/1,
/*mr=*/1, /*nr=*/128, /*kr=*/1, /*sr=*/1,
/*is_igemm=*/true,
[](GemmMicrokernelTester& tester) {
tester.Test(xnn_f32_igemm_minmax_ukernel_8x32__hvx_broadcast,
tester.Test(xnn_f32_igemm_minmax_ukernel_1x128__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_16X32__HVX_BROADCAST, GemmTest,
F32_IGEMM_MINMAX_2X128__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
/*k_block=*/1,
/*adj_k_block=*/1,
/*mr=*/16, /*nr=*/32, /*kr=*/1, /*sr=*/1,
/*mr=*/2, /*nr=*/128, /*kr=*/1, /*sr=*/1,
/*is_igemm=*/true,
[](GemmMicrokernelTester& tester) {
tester.Test(xnn_f32_igemm_minmax_ukernel_16x32__hvx_broadcast,
tester.Test(xnn_f32_igemm_minmax_ukernel_2x128__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_7X64__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
Expand All @@ -3230,46 +3236,55 @@ INSTANTIATE_TEST_SUITE_P(
tester.Test(xnn_f32_igemm_minmax_ukernel_7x64__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_1X128__HVX_BROADCAST, GemmTest,
F32_IGEMM_MINMAX_8X32__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
/*k_block=*/1,
/*adj_k_block=*/1,
/*mr=*/1, /*nr=*/128, /*kr=*/1, /*sr=*/1,
/*mr=*/8, /*nr=*/32, /*kr=*/1, /*sr=*/1,
/*is_igemm=*/true,
[](GemmMicrokernelTester& tester) {
tester.Test(xnn_f32_igemm_minmax_ukernel_1x128__hvx_broadcast,
tester.Test(xnn_f32_igemm_minmax_ukernel_8x32__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_2X128__HVX_BROADCAST, GemmTest,
F32_IGEMM_MINMAX_16X32__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
/*k_block=*/1,
/*adj_k_block=*/1,
/*mr=*/2, /*nr=*/128, /*kr=*/1, /*sr=*/1,
/*mr=*/16, /*nr=*/32, /*kr=*/1, /*sr=*/1,
/*is_igemm=*/true,
[](GemmMicrokernelTester& tester) {
tester.Test(xnn_f32_igemm_minmax_ukernel_2x128__hvx_broadcast,
tester.Test(xnn_f32_igemm_minmax_ukernel_16x32__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
21 changes: 15 additions & 6 deletions test/f32-igemm-minmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2894,7 +2894,7 @@ INSTANTIATE_TEST_SUITE_P(
});


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_1X32__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
Expand All @@ -2906,14 +2906,17 @@ INSTANTIATE_TEST_SUITE_P(
tester.Test(xnn_f32_igemm_minmax_ukernel_1x32__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_1X64__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
Expand All @@ -2925,14 +2928,17 @@ INSTANTIATE_TEST_SUITE_P(
tester.Test(xnn_f32_igemm_minmax_ukernel_1x64__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON


#if XNN_ARCH_HEXAGON
#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
INSTANTIATE_TEST_SUITE_P(
F32_IGEMM_MINMAX_4X64__HVX_BROADCAST, GemmTest,
testing::ValuesIn(CreateTests1(
Expand All @@ -2944,8 +2950,11 @@ INSTANTIATE_TEST_SUITE_P(
tester.Test(xnn_f32_igemm_minmax_ukernel_4x64__hvx_broadcast,
xnn_init_f32_minmax_hvx_params,
xnn_pack_f32_conv_goki_w);
},
[]() {
TEST_REQUIRES_HVX;
})),
[](const testing::TestParamInfo<GemmTest::ParamType>& info) {
return info.param.test_name;
});
#endif // XNN_ARCH_HEXAGON
#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON
10 changes: 5 additions & 5 deletions test/f32-igemm-minmax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -883,15 +883,15 @@
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
- name: xnn_f32_igemm_minmax_ukernel_8x32__hvx_broadcast
- name: xnn_f32_igemm_minmax_ukernel_1x64__hvx_broadcast
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
- name: xnn_f32_igemm_minmax_ukernel_16x32__hvx_broadcast
- name: xnn_f32_igemm_minmax_ukernel_1x128__hvx_broadcast
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
- name: xnn_f32_igemm_minmax_ukernel_1x64__hvx_broadcast
- name: xnn_f32_igemm_minmax_ukernel_2x128__hvx_broadcast
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
Expand All @@ -903,11 +903,11 @@
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
- name: xnn_f32_igemm_minmax_ukernel_1x128__hvx_broadcast
- name: xnn_f32_igemm_minmax_ukernel_8x32__hvx_broadcast
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
- name: xnn_f32_igemm_minmax_ukernel_2x128__hvx_broadcast
- name: xnn_f32_igemm_minmax_ukernel_16x32__hvx_broadcast
init: xnn_init_f32_minmax_hvx_params
pack: xnn_pack_f32_conv_goki_w
k-block: 1
2 changes: 0 additions & 2 deletions test/f32-vtanh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,3 @@
- name: xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u8
- name: xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u12
- name: xnn_f32_vtanh_ukernel__neon_rational_9_6_nr_u16


0 comments on commit 1a5f28e

Please sign in to comment.