Skip to content

Commit

Permalink
MRx2 GEMM/IGEMM fma3 variant
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 719127729
  • Loading branch information
fbarchard authored and xnnpack-bot committed Jan 24, 2025
1 parent a108468 commit 025e3b6
Show file tree
Hide file tree
Showing 29 changed files with 1,534 additions and 156 deletions.
44 changes: 33 additions & 11 deletions bench/f32-gemm-minmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3421,6 +3421,17 @@

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_4x2c4__sse)

static void f32_gemm_minmax_ukernel_6x2c4__sse(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_6x2c4__sse,
xnn_init_f32_minmax_scalar_params,
xnn_pack_f32_gemm_goi_w,
/*mr=*/6, /*nr=*/2, /*kr=*/4, /*sr=*/1,
/*isa_check=*/nullptr);
}

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x2c4__sse)

static void f32_gemm_minmax_ukernel_4x8__sse_dup(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_4x8__sse_dup,
Expand Down Expand Up @@ -3487,17 +3498,6 @@

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_5x8s4__sse)

static void f32_gemm_minmax_ukernel_6x2c4__sse(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_6x2c4__sse,
xnn_init_f32_minmax_scalar_params,
xnn_pack_f32_gemm_goi_w,
/*mr=*/6, /*nr=*/2, /*kr=*/4, /*sr=*/1,
/*isa_check=*/nullptr);
}

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x2c4__sse)

static void f32_gemm_minmax_ukernel_6x8__sse_dup(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_6x8__sse_dup,
Expand Down Expand Up @@ -3531,6 +3531,28 @@

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x8s4__sse)

static void f32_gemm_minmax_ukernel_4x2c4__fma3(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_4x2c4__fma3,
xnn_init_f32_minmax_scalar_params,
xnn_pack_f32_gemm_goi_w,
/*mr=*/4, /*nr=*/2, /*kr=*/4, /*sr=*/1,
benchmark::utils::CheckFMA3);
}

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_4x2c4__fma3)

static void f32_gemm_minmax_ukernel_6x2c4__fma3(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_6x2c4__fma3,
xnn_init_f32_minmax_scalar_params,
xnn_pack_f32_gemm_goi_w,
/*mr=*/6, /*nr=*/2, /*kr=*/4, /*sr=*/1,
benchmark::utils::CheckFMA3);
}

BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x2c4__fma3)

static void f32_gemm_minmax_ukernel_1x8__fma3_broadcast(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,
Expand Down
12 changes: 6 additions & 6 deletions cmake/gen/avx512f_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@ SET(PROD_AVX512F_MICROKERNEL_SRCS
src/f32-dwconv/gen/f32-dwconv-5f5m5l32c16s1r-minmax-avx512f.c
src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx512f.c
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f.c
src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-1x32-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-7x32-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-1x32-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-7x32-minmax-avx512f-broadcast.c
src/f32-raddstoreexpminusmax/gen/f32-raddstoreexpminusmax-avx512f-rr2-p5-u64-acc2.c
src/f32-rdsum/gen/f32-rdsum-7p7x-minmax-avx512f-c64.c
Expand Down Expand Up @@ -68,8 +64,6 @@ SET(PROD_AVX512F_MICROKERNEL_SRCS
src/f32-vunary/gen/f32-vabs-avx512f.c
src/f32-vunary/gen/f32-vneg-avx512f.c
src/f32-vunary/gen/f32-vsqr-avx512f.c
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c
src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u8.c
src/x32-packw/gen/x32-packw-x32-gemm-goi-avx512f-u4-prfm.c)

Expand All @@ -89,6 +83,7 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f-acc2.c
src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f-acc2.c
src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f.c
src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-1x64-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-4x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-4x32-minmax-avx512f-broadcast.c
Expand All @@ -99,6 +94,7 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
src/f32-gemm/gen/f32-gemm-6x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-6x32-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-6x64-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-7x64-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-8x16-minmax-avx512f-broadcast.c
src/f32-gemm/gen/f32-gemm-8x32-minmax-avx512f-broadcast.c
Expand Down Expand Up @@ -133,12 +129,14 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx512f-broadcast.c
src/f32-gemminc/gen/f32-gemminc-7x16-minmax-avx512f-broadcast.c
src/f32-gemminc/gen/f32-gemminc-8x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-4x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-4x32-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-5x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-5x32-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-6x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-6x32-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-8x16-minmax-avx512f-broadcast.c
src/f32-igemm/gen/f32-igemm-8x32-minmax-avx512f-broadcast.c
src/f32-raddexpminusmax/gen/f32-raddexpminusmax-avx512f-p5-scalef-u64-acc2.c
Expand Down Expand Up @@ -277,6 +275,8 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1-prfm.c
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1.c
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8-prfm.c
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4.c
src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1-prfm.c
src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1.c
Expand Down
12 changes: 6 additions & 6 deletions cmake/gen/avx_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,9 @@ SET(PROD_AVX_MICROKERNEL_SRCS
src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx.c
src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-avx.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u24.c
src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-1x16-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-5x16-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-1x16-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-5x16-minmax-avx-broadcast.c
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x16-minmax-avx-broadcast.c
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-3x16-minmax-avx-broadcast.c
Expand Down Expand Up @@ -105,8 +101,6 @@ SET(PROD_AVX_MICROKERNEL_SRCS
src/qu8-vmul/gen/qu8-vmul-minmax-fp32-avx-mul16-ld64-u16.c
src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-avx-mul16-ld64-u16.c
src/x8-lut/gen/x8-lut-avx-u64.c
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c
src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u8.c
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx-u4.c
src/x32-packw/gen/x32-packw-x16s4-gemm-goi-avx-u4.c
Expand Down Expand Up @@ -147,9 +141,11 @@ SET(NON_PROD_AVX_MICROKERNEL_SRCS
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u8.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u16.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u32.c
src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-3x16-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-4x8-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-4x16-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-6x8-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-6x16-minmax-avx-broadcast.c
src/f32-gemm/gen/f32-gemm-7x8-minmax-avx-broadcast.c
Expand All @@ -163,9 +159,11 @@ SET(NON_PROD_AVX_MICROKERNEL_SRCS
src/f32-gemminc/gen/f32-gemminc-6x8-minmax-avx-broadcast.c
src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx-broadcast.c
src/f32-gemminc/gen/f32-gemminc-7x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-3x16-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-4x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-4x16-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-6x8-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-6x16-minmax-avx-broadcast.c
src/f32-igemm/gen/f32-igemm-7x8-minmax-avx-broadcast.c
Expand Down Expand Up @@ -459,7 +457,9 @@ SET(NON_PROD_AVX_MICROKERNEL_SRCS
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1-prfm.c
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1.c
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8-prfm.c
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c
src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4-prfm.c
src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c
src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4-prfm.c
src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4.c
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u1-prfm.c
Expand Down
18 changes: 12 additions & 6 deletions cmake/gen/fma3_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,15 @@ SET(PROD_FMA3_MICROKERNEL_SRCS
src/f32-dwconv/gen/f32-dwconv-5f5m5l8c8s4r-minmax-fma3.c
src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-fma3.c
src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-fma3.c
src/f32-gemm/gen/f32-gemm-1x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-1x16-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-1x16s4-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-4x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-4x2c4-minmax-fma3.c
src/f32-gemm/gen/f32-gemm-4x16s4-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-5x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-5x16-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-1x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-1x16-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-1x16s4-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-4x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-4x2c4-minmax-fma3.c
src/f32-igemm/gen/f32-igemm-4x16s4-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-5x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-5x16-minmax-fma3-broadcast-prfm.c
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x16-minmax-fma3-broadcast.c
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-3x16-minmax-fma3-broadcast.c
Expand Down Expand Up @@ -123,10 +119,14 @@ SET(NON_PROD_FMA3_MICROKERNEL_SRCS
src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-fma3-acc2.c
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-fma3-acc2.c
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-fma3.c
src/f32-gemm/gen/f32-gemm-1x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-3x16-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-3x16s4-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-4x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-4x16-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-5x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-5x16s4-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-6x2c4-minmax-fma3.c
src/f32-gemm/gen/f32-gemm-6x8-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-6x16-minmax-fma3-broadcast.c
src/f32-gemm/gen/f32-gemm-6x16s4-minmax-fma3-broadcast.c
Expand All @@ -148,11 +148,15 @@ SET(NON_PROD_FMA3_MICROKERNEL_SRCS
src/f32-gemminc/gen/f32-gemminc-6x16s4-minmax-fma3-broadcast.c
src/f32-gemminc/gen/f32-gemminc-7x8-minmax-fma3-broadcast.c
src/f32-gemminc/gen/f32-gemminc-8x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-1x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-3x16-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-3x16s4-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-4x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-4x16-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-5x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-5x16-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-5x16s4-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-6x2c4-minmax-fma3.c
src/f32-igemm/gen/f32-igemm-6x8-minmax-fma3-broadcast.c
src/f32-igemm/gen/f32-igemm-6x16-minmax-fma3-broadcast-prfm.c
src/f32-igemm/gen/f32-igemm-6x16-minmax-fma3-broadcast.c
Expand All @@ -167,7 +171,9 @@ SET(NON_PROD_FMA3_MICROKERNEL_SRCS
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-8x16-minmax-fma3-broadcast.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-2x16-minmax-fma3-broadcast.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-3x16-minmax-fma3-broadcast.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-4x2c4-minmax-fma3.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-4x16-minmax-fma3-broadcast.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-6x2c4-minmax-fma3.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-6x16-minmax-fma3-broadcast.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-7x16-minmax-fma3-broadcast.c
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-8x16-minmax-fma3-broadcast.c
Expand Down
12 changes: 6 additions & 6 deletions gen/avx512f_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,9 @@ PROD_AVX512F_MICROKERNEL_SRCS = [
"src/f32-dwconv/gen/f32-dwconv-5f5m5l32c16s1r-minmax-avx512f.c",
"src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx512f.c",
"src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f.c",
"src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-1x32-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-7x32-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-1x32-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-7x32-minmax-avx512f-broadcast.c",
"src/f32-raddstoreexpminusmax/gen/f32-raddstoreexpminusmax-avx512f-rr2-p5-u64-acc2.c",
"src/f32-rdsum/gen/f32-rdsum-7p7x-minmax-avx512f-c64.c",
Expand Down Expand Up @@ -64,8 +60,6 @@ PROD_AVX512F_MICROKERNEL_SRCS = [
"src/f32-vunary/gen/f32-vabs-avx512f.c",
"src/f32-vunary/gen/f32-vneg-avx512f.c",
"src/f32-vunary/gen/f32-vsqr-avx512f.c",
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c",
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c",
"src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u8.c",
"src/x32-packw/gen/x32-packw-x32-gemm-goi-avx512f-u4-prfm.c",
]
Expand All @@ -86,6 +80,7 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
"src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f-acc2.c",
"src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f-acc2.c",
"src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f.c",
"src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-1x64-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-4x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-4x32-minmax-avx512f-broadcast.c",
Expand All @@ -96,6 +91,7 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
"src/f32-gemm/gen/f32-gemm-6x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-6x32-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-6x64-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-7x64-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-8x16-minmax-avx512f-broadcast.c",
"src/f32-gemm/gen/f32-gemm-8x32-minmax-avx512f-broadcast.c",
Expand Down Expand Up @@ -130,12 +126,14 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
"src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx512f-broadcast.c",
"src/f32-gemminc/gen/f32-gemminc-7x16-minmax-avx512f-broadcast.c",
"src/f32-gemminc/gen/f32-gemminc-8x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-4x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-4x32-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-5x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-5x32-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-6x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-6x32-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-8x16-minmax-avx512f-broadcast.c",
"src/f32-igemm/gen/f32-igemm-8x32-minmax-avx512f-broadcast.c",
"src/f32-raddexpminusmax/gen/f32-raddexpminusmax-avx512f-p5-scalef-u64-acc2.c",
Expand Down Expand Up @@ -274,6 +272,8 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1-prfm.c",
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1.c",
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8-prfm.c",
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c",
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c",
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4.c",
"src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1-prfm.c",
"src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1.c",
Expand Down
12 changes: 6 additions & 6 deletions gen/avx_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@ PROD_AVX_MICROKERNEL_SRCS = [
"src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx.c",
"src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-avx.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u24.c",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-1x16-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-5x16-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-1x16-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-5x16-minmax-avx-broadcast.c",
"src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x16-minmax-avx-broadcast.c",
"src/f32-qc4w-gemm/gen/f32-qc4w-gemm-3x16-minmax-avx-broadcast.c",
Expand Down Expand Up @@ -101,8 +97,6 @@ PROD_AVX_MICROKERNEL_SRCS = [
"src/qu8-vmul/gen/qu8-vmul-minmax-fp32-avx-mul16-ld64-u16.c",
"src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-avx-mul16-ld64-u16.c",
"src/x8-lut/gen/x8-lut-avx-u64.c",
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c",
"src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c",
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u8.c",
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx-u4.c",
"src/x32-packw/gen/x32-packw-x16s4-gemm-goi-avx-u4.c",
Expand Down Expand Up @@ -144,9 +138,11 @@ NON_PROD_AVX_MICROKERNEL_SRCS = [
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u8.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u16.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u32.c",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-3x16-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-4x16-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-6x16-minmax-avx-broadcast.c",
"src/f32-gemm/gen/f32-gemm-7x8-minmax-avx-broadcast.c",
Expand All @@ -160,9 +156,11 @@ NON_PROD_AVX_MICROKERNEL_SRCS = [
"src/f32-gemminc/gen/f32-gemminc-6x8-minmax-avx-broadcast.c",
"src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx-broadcast.c",
"src/f32-gemminc/gen/f32-gemminc-7x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-3x16-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-4x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-4x16-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-6x8-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-6x16-minmax-avx-broadcast.c",
"src/f32-igemm/gen/f32-igemm-7x8-minmax-avx-broadcast.c",
Expand Down Expand Up @@ -456,7 +454,9 @@ NON_PROD_AVX_MICROKERNEL_SRCS = [
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1-prfm.c",
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1.c",
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8-prfm.c",
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c",
"src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4-prfm.c",
"src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c",
"src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4-prfm.c",
"src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4.c",
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u1-prfm.c",
Expand Down
Loading

0 comments on commit 025e3b6

Please sign in to comment.