Skip to content

Commit

Permalink
Add wrappers for the KleidiAI qp8-qc4w GEMM microkernels.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 638179017
  • Loading branch information
gonnet authored and xnnpack-bot committed Jun 11, 2024
1 parent 56fa083 commit 90e114d
Show file tree
Hide file tree
Showing 141 changed files with 3,640 additions and 583 deletions.
61 changes: 55 additions & 6 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects") # buildifier: disable=out-of-order-load
load("@rules_python//python:py_binary.bzl", "py_binary")
load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_cc_library", "xnnpack_gcc_std_copts", "xnnpack_min_size_copts", "xnnpack_msvc_std_copts", "xnnpack_slinky_defines", "xnnpack_slinky_deps", "xnnpack_slinky_srcs", "xnnpack_std_cxxopts", "xnnpack_transitive_source_list", "xnnpack_visibility")
load("//gen:microkernels.bzl", "AARCH32_ASM_MICROKERNEL_SRCS", "AARCH32_JIT_MICROKERNEL_SRCS", "AARCH64_ASM_MICROKERNEL_SRCS", "AARCH64_JIT_MICROKERNEL_SRCS", "ALL_ARMSIMD32_MICROKERNEL_SRCS", "ALL_AVX2_MICROKERNEL_SRCS", "ALL_AVX512AMX_MICROKERNEL_SRCS", "ALL_AVX512FP16_MICROKERNEL_SRCS", "ALL_AVX512F_MICROKERNEL_SRCS", "ALL_AVX512SKX_MICROKERNEL_SRCS", "ALL_AVX512VBMI_MICROKERNEL_SRCS", "ALL_AVX512VNNIGFNI_MICROKERNEL_SRCS", "ALL_AVX512VNNI_MICROKERNEL_SRCS", "ALL_AVXVNNI_MICROKERNEL_SRCS", "ALL_AVX_MICROKERNEL_SRCS", "ALL_F16C_MICROKERNEL_SRCS", "ALL_FMA3_MICROKERNEL_SRCS", "ALL_FMA_MICROKERNEL_SRCS", "ALL_FP16ARITH_MICROKERNEL_SRCS", "ALL_HEXAGON_MICROKERNEL_SRCS", "ALL_HVX_MICROKERNEL_SRCS", "ALL_NEONBF16_AARCH64_MICROKERNEL_SRCS", "ALL_NEONBF16_MICROKERNEL_SRCS", "ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS", "ALL_NEONDOT_AARCH64_MICROKERNEL_SRCS", "ALL_NEONDOT_MICROKERNEL_SRCS", "ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS", "ALL_NEONFMA_MICROKERNEL_SRCS", "ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS", "ALL_NEONFP16ARITH_MICROKERNEL_SRCS", "ALL_NEONFP16_MICROKERNEL_SRCS", "ALL_NEONI8MM_MICROKERNEL_SRCS", "ALL_NEONV8_MICROKERNEL_SRCS", "ALL_NEON_AARCH64_MICROKERNEL_SRCS", "ALL_NEON_MICROKERNEL_SRCS", "ALL_RVVFP16ARITH_MICROKERNEL_SRCS", "ALL_RVV_MICROKERNEL_SRCS", "ALL_SCALAR_MICROKERNEL_SRCS", "ALL_SSE2_MICROKERNEL_SRCS", "ALL_SSE41_MICROKERNEL_SRCS", "ALL_SSE_MICROKERNEL_SRCS", "ALL_SSSE3_MICROKERNEL_SRCS", "ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS", "ALL_WASMSIMD_MICROKERNEL_SRCS", "ALL_WASM_MICROKERNEL_SRCS", "WASM32_ASM_MICROKERNEL_SRCS", "WASM32_JIT_MICROKERNEL_SRCS", "WASMRELAXEDSIMD32_JIT_MICROKERNEL_SRCS", "WASMSIMD32_JIT_MICROKERNEL_SRCS")
load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_cc_library", "xnnpack_gcc_std_copts", "xnnpack_if_kleidiai_enabled", "xnnpack_kleidiai_defines", "xnnpack_min_size_copts", "xnnpack_msvc_std_copts", "xnnpack_slinky_defines", "xnnpack_slinky_deps", "xnnpack_slinky_srcs", "xnnpack_std_cxxopts", "xnnpack_transitive_source_list", "xnnpack_visibility")
load("//gen:microkernels.bzl", "AARCH32_ASM_MICROKERNEL_SRCS", "AARCH32_JIT_MICROKERNEL_SRCS", "AARCH64_ASM_MICROKERNEL_SRCS", "AARCH64_JIT_MICROKERNEL_SRCS", "ALL_ARMSIMD32_MICROKERNEL_SRCS", "ALL_AVX2_MICROKERNEL_SRCS", "ALL_AVX512AMX_MICROKERNEL_SRCS", "ALL_AVX512FP16_MICROKERNEL_SRCS", "ALL_AVX512F_MICROKERNEL_SRCS", "ALL_AVX512SKX_MICROKERNEL_SRCS", "ALL_AVX512VBMI_MICROKERNEL_SRCS", "ALL_AVX512VNNIGFNI_MICROKERNEL_SRCS", "ALL_AVX512VNNI_MICROKERNEL_SRCS", "ALL_AVXVNNI_MICROKERNEL_SRCS", "ALL_AVX_MICROKERNEL_SRCS", "ALL_F16C_MICROKERNEL_SRCS", "ALL_FMA3_MICROKERNEL_SRCS", "ALL_FMA_MICROKERNEL_SRCS", "ALL_FP16ARITH_MICROKERNEL_SRCS", "ALL_HEXAGON_MICROKERNEL_SRCS", "ALL_HVX_MICROKERNEL_SRCS", "ALL_NEONBF16_AARCH64_MICROKERNEL_SRCS", "ALL_NEONBF16_MICROKERNEL_SRCS", "ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS", "ALL_NEONDOT_AARCH64_MICROKERNEL_SRCS", "ALL_NEONDOT_MICROKERNEL_SRCS", "ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS", "ALL_NEONFMA_MICROKERNEL_SRCS", "ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS", "ALL_NEONFP16ARITH_MICROKERNEL_SRCS", "ALL_NEONFP16_MICROKERNEL_SRCS", "ALL_NEONI8MM_AARCH64_MICROKERNEL_SRCS", "ALL_NEONI8MM_MICROKERNEL_SRCS", "ALL_NEONV8_MICROKERNEL_SRCS", "ALL_NEON_AARCH64_MICROKERNEL_SRCS", "ALL_NEON_MICROKERNEL_SRCS", "ALL_RVVFP16ARITH_MICROKERNEL_SRCS", "ALL_RVV_MICROKERNEL_SRCS", "ALL_SCALAR_MICROKERNEL_SRCS", "ALL_SSE2_MICROKERNEL_SRCS", "ALL_SSE41_MICROKERNEL_SRCS", "ALL_SSE_MICROKERNEL_SRCS", "ALL_SSSE3_MICROKERNEL_SRCS", "ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS", "ALL_WASMSIMD_MICROKERNEL_SRCS", "ALL_WASM_MICROKERNEL_SRCS", "WASM32_ASM_MICROKERNEL_SRCS", "WASM32_JIT_MICROKERNEL_SRCS", "WASMRELAXEDSIMD32_JIT_MICROKERNEL_SRCS", "WASMSIMD32_JIT_MICROKERNEL_SRCS")

licenses(["notice"])

Expand Down Expand Up @@ -135,6 +135,7 @@ MICROKERNEL_HDRS = [
"src/xnnpack/lut.h",
"src/xnnpack/maxpool.h",
"src/xnnpack/packb.h",
"src/xnnpack/packq.h",
"src/xnnpack/packw.h",
"src/xnnpack/packx.h",
"src/xnnpack/pad.h",
Expand Down Expand Up @@ -725,9 +726,10 @@ xnnpack_cc_library(
"src/amalgam/gen/neon-aarch64.c",
"src/amalgam/gen/neon.c",
],
defines = xnnpack_kleidiai_defines(),
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
deps = MICROKERNEL_DEPS,
deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled(["@KleidiAI//:kleidiai_neon"]),
)

xnnpack_cc_library(
Expand All @@ -739,9 +741,12 @@ xnnpack_cc_library(
],
aarch32_srcs = ALL_NEON_MICROKERNEL_SRCS,
aarch64_srcs = ALL_NEON_MICROKERNEL_SRCS + ALL_NEON_AARCH64_MICROKERNEL_SRCS,
defines = xnnpack_kleidiai_defines(),
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
deps = MICROKERNEL_DEPS,
deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled([
"@KleidiAI//:kleidiai_neon",
]),
)

xnnpack_cc_library(
Expand Down Expand Up @@ -901,6 +906,9 @@ xnnpack_cc_library(
],
aarch32_srcs = ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
aarch64_copts = ["-march=armv8.2-a+dotprod+fp16"],
aarch64_deps = [
"@KleidiAI//:kleidiai_dotprod",
],
aarch64_srcs = ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
Expand All @@ -916,6 +924,9 @@ xnnpack_cc_library(
],
aarch32_srcs = ["src/amalgam/gen/neondot.c"],
aarch64_copts = ["-march=armv8.2-a+dotprod"],
aarch64_deps = [
"@KleidiAI//:kleidiai_dotprod",
],
aarch64_srcs = [
"src/amalgam/gen/neondot-aarch64.c",
"src/amalgam/gen/neondot.c",
Expand All @@ -934,6 +945,9 @@ xnnpack_cc_library(
],
aarch32_srcs = ALL_NEONDOT_MICROKERNEL_SRCS,
aarch64_copts = ["-march=armv8.2-a+dotprod"],
aarch64_deps = [
"@KleidiAI//:kleidiai_dotprod",
],
aarch64_srcs = ALL_NEONDOT_MICROKERNEL_SRCS + ALL_NEONDOT_AARCH64_MICROKERNEL_SRCS,
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
Expand All @@ -949,7 +963,13 @@ xnnpack_cc_library(
],
aarch32_srcs = ["src/amalgam/gen/neoni8mm.c"],
aarch64_copts = ["-march=armv8.2-a+i8mm+fp16"],
aarch64_srcs = ["src/amalgam/gen/neoni8mm.c"],
aarch64_deps = [
"@KleidiAI//:kleidiai_i8mm",
],
aarch64_srcs = [
"src/amalgam/gen/neoni8mm-aarch64.c",
"src/amalgam/gen/neoni8mm.c",
],
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
deps = MICROKERNEL_DEPS,
Expand All @@ -964,7 +984,10 @@ xnnpack_cc_library(
],
aarch32_srcs = ALL_NEONI8MM_MICROKERNEL_SRCS,
aarch64_copts = ["-march=armv8.2-a+i8mm+fp16"],
aarch64_srcs = ALL_NEONI8MM_MICROKERNEL_SRCS,
aarch64_deps = [
"@KleidiAI//:kleidiai_i8mm",
],
aarch64_srcs = ALL_NEONI8MM_MICROKERNEL_SRCS + ALL_NEONI8MM_AARCH64_MICROKERNEL_SRCS,
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
deps = MICROKERNEL_DEPS,
Expand Down Expand Up @@ -2564,6 +2587,18 @@ config_setting(
define_values = {"xnn_enable_cpuinfo": "false"},
)

# Enables usage of the KleidiAI library.
config_setting(
name = "xnn_enable_kleidiai_explicit_true",
define_values = {"xnn_enable_kleidiai": "true"},
)

# Disables usage of the KleidiAI library.
config_setting(
name = "xnn_enable_kleidiai_explicit_false",
define_values = {"xnn_enable_kleidiai": "false"},
)

# Enables usage of assembly kernels.
config_setting(
name = "xnn_enable_assembly_explicit_true",
Expand Down Expand Up @@ -2946,6 +2981,20 @@ alias(
}),
)

selects.config_setting_group(
name = "kleidiai_enabled_by_default",
match_any = [":xnn_enable_kleidiai_explicit_true"],
)

alias(
name = "kleidiai_enabled",
actual = select({
":xnn_enable_kleidiai_explicit_true": ":xnn_enable_kleidiai_explicit_true",
":xnn_enable_kleidiai_explicit_false": ":xnn_enable_kleidiai_explicit_true",
"//conditions:default": ":kleidiai_enabled_by_default",
}),
)

selects.config_setting_group(
name = "assembly_enabled_by_default",
match_any = [
Expand Down
83 changes: 83 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ OPTION(USE_GNU_SOURCE "Use _GNU_SOURCE macro" OFF)
IF(XNNPACK_BUILD_BENCHMARKS OR XNNPACK_BUILD_TESTS)
SET(XNNPACK_BUILD_ALL_MICROKERNELS ON)
ENDIF()
OPTION(XNNPACK_ENABLE_KLEIDIAI "Use KleidiAI GEMM microkernels for Arm" OFF)

# --- [ Determine target processor
IF(CMAKE_OSX_ARCHITECTURES)
Expand Down Expand Up @@ -160,6 +161,7 @@ ADD_COMPILE_DEFINITIONS("XNN_ENABLE_SPARSE=$<BOOL:${XNNPACK_ENABLE_SPARSE}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_GEMM_M_SPECIALIZATION=$<BOOL:${XNNPACK_ENABLE_GEMM_M_SPECIALIZATION}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_DWCONV_MULTIPASS=$<BOOL:${XNNPACK_ENABLE_DWCONV_MULTIPASS}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_HVX=$<BOOL:${XNNPACK_ENABLE_HVX}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_KLEIDIAI=$<BOOL:${XNNPACK_ENABLE_KLEIDIAI}>")

IF(XNNPACK_PLATFORM_JIT STREQUAL "ON" OR XNNPACK_PLATFORM_JIT STREQUAL "OFF")
ADD_COMPILE_DEFINITIONS("XNN_PLATFORM_JIT=$<BOOL:${XNNPACK_PLATFORM_JIT}>")
Expand Down Expand Up @@ -253,6 +255,16 @@ IF(NOT XNNPACK_USE_SYSTEM_LIBS)
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googlebenchmark-download")
SET(GOOGLEBENCHMARK_SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-source" CACHE STRING "Google Benchmark source directory")
ENDIF()

IF(XNNPACK_ENABLE_KLEIDIAI AND NOT DEFINED KLEIDIAI_SOURCE_DIR)
MESSAGE(STATUS "Downloading KleidiAI to ${CMAKE_BINARY_DIR}/kleidiai-source (define KLEIDIAI_SOURCE_DIR to avoid it)")
CONFIGURE_FILE(cmake/DownloadKleidiAI.cmake "${CMAKE_BINARY_DIR}/kleidiai-download/CMakeLists.txt")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/kleidiai-download")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/kleidiai-download")
SET(KLEIDIAI_SOURCE_DIR "${CMAKE_BINARY_DIR}/kleidiai-source" CACHE STRING "kleidiai source directory")
ENDIF()
ENDIF()

# ---[ XNNPACK library
Expand Down Expand Up @@ -1136,6 +1148,34 @@ IF(XNNPACK_BUILD_LIBRARY)
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
ENDIF()

# ---[ Configure KleidiAI
IF(XNNPACK_ENABLE_KLEIDIAI)
IF(NOT TARGET kleidiai)
IF(NOT XNNPACK_USE_SYSTEM_LIBS)
SET(KLEIDIAI_BUILD_TESTS OFF CACHE BOOL "")
ADD_SUBDIRECTORY(
"${KLEIDIAI_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/kleidiai")
ELSE()
ADD_LIBRARY(kleidiai SHARED IMPORTED)
FIND_LIBRARY(KLEIDIAI_LIBRARY kleidiai PATHS "${KLEIDIAI_SOURCE_DIR}/lib")
IF(NOT KLEIDIAI_LIBRARY)
MESSAGE(FATAL_ERROR "Cannot find KleidiAI")
ENDIF()
TARGET_INCLUDE_DIRECTORIES(kleidiai INTERFACE "${KLEIDIAI_SOURCE_DIR}")
SET_PROPERTY(TARGET kleidiai PROPERTY IMPORTED_LOCATION "${KLEIDIAI_LIBRARY}")
SET_PROPERTY(TARGET kleidiai PROPERTY IMPORTED_IMPLIB "${KLEIDIAI_LIBRARY}")
ENDIF()
ENDIF()
IF(XNNPACK_BUILD_ALL_MICROKERNELS)
TARGET_LINK_LIBRARIES(microkernels-all PRIVATE kleidiai)
ENDIF()
TARGET_LINK_LIBRARIES(microkernels-prod PRIVATE kleidiai)
IF(XNNPACK_BUILD_LIBRARY)
TARGET_LINK_LIBRARIES(XNNPACK PRIVATE kleidiai)
ENDIF()
ENDIF()

# ---[ XNNPACK unit tests
IF(XNNPACK_BUILD_TESTS)
# ---[ Build google test
Expand Down Expand Up @@ -1193,6 +1233,13 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_INCLUDE_DIRECTORIES(convolution-test-helpers PRIVATE include src)
TARGET_LINK_LIBRARIES(convolution-test-helpers PRIVATE fp16)

ADD_LIBRARY(packq-microkernel-tester STATIC test/packq-microkernel-tester.cc)
TARGET_INCLUDE_DIRECTORIES(packq-microkernel-tester PRIVATE . include src test)
TARGET_LINK_LIBRARIES(packq-microkernel-tester PRIVATE XNNPACK fp16 pthreadpool GTest::gtest)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(packq-microkernel-tester PRIVATE kleidiai)
ENDIF()

IF(XNNPACK_BUILD_LIBRARY)
# ---[ Build size tests
ADD_EXECUTABLE(operator-size-test test/operator-size.c)
Expand Down Expand Up @@ -2926,6 +2973,12 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(qd8-f32-qc8w-igemm-minmax-test PRIVATE gemm-microkernel-tester hardware-config logging microkernels-all microparams-init)
ADD_TEST(NAME qd8-f32-qc8w-igemm-minmax-test COMMAND qd8-f32-qc8w-igemm-minmax-test)

ADD_EXECUTABLE(qp8-f32-qc4w-gemm-minmax-test test/qp8-f32-qc4w-gemm-minmax.cc test/qp8-f32-qc4w-gemm-minmax-2.cc test/qp8-f32-qc4w-gemm-minmax-3.cc test/qp8-f32-qc4w-gemm-minmax-4.cc)
TARGET_INCLUDE_DIRECTORIES(qp8-f32-qc4w-gemm-minmax-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(qp8-f32-qc4w-gemm-minmax-test PRIVATE fp16 pthreadpool GTest::gtest GTest::gtest_main)
TARGET_LINK_LIBRARIES(qp8-f32-qc4w-gemm-minmax-test PRIVATE gemm-microkernel-tester hardware-config logging microkernels-all microparams-init)
ADD_TEST(NAME qp8-f32-qc4w-gemm-minmax-test COMMAND qp8-f32-qc4w-gemm-minmax-test)

ADD_EXECUTABLE(qs8-qc8w-dwconv-minmax-multipass-fp32-test test/qs8-qc8w-dwconv-minmax-multipass-fp32.cc)
TARGET_INCLUDE_DIRECTORIES(qs8-qc8w-dwconv-minmax-multipass-fp32-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(qs8-qc8w-dwconv-minmax-multipass-fp32-test PRIVATE fp16 pthreadpool GTest::gtest GTest::gtest_main)
Expand Down Expand Up @@ -3340,6 +3393,15 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(x32-packb-test PRIVATE hardware-config logging microkernels-all packing)
ADD_TEST(NAME x32-packb-test COMMAND x32-packb-test)

ADD_EXECUTABLE(x8-packq-test test/x8-packq.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packq-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(x8-packq-test PRIVATE pthreadpool GTest::gtest GTest::gtest_main)
TARGET_LINK_LIBRARIES(x8-packq-test PRIVATE packq-microkernel-tester hardware-config logging microkernels-all packing)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(x8-packq-test PRIVATE kleidiai)
ENDIF()
ADD_TEST(NAME x8-packq-test COMMAND x8-packq-test)

ADD_EXECUTABLE(x8-packw-test test/x8-packw.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packw-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(x8-packw-test PRIVATE pthreadpool GTest::gtest GTest::gtest_main)
Expand Down Expand Up @@ -3519,6 +3581,14 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_LINK_LIBRARIES(bench-utils PRIVATE logging memory)
ENDIF()

# Helper libraries
ADD_LIBRARY(packq-benchmark STATIC bench/packq-benchmark.cc)
TARGET_INCLUDE_DIRECTORIES(packq-benchmark PRIVATE . include src bench)
TARGET_LINK_LIBRARIES(packq-benchmark PRIVATE XNNPACK benchmark::benchmark bench-utils)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(packq-benchmark PRIVATE kleidiai)
ENDIF()

# ---[ Build accuracy microbenchmarks
ADD_EXECUTABLE(f16-exp-ulp-eval eval/f16-exp-ulp.cc)
TARGET_INCLUDE_DIRECTORIES(f16-exp-ulp-eval PRIVATE . src)
Expand Down Expand Up @@ -4207,6 +4277,11 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_LINK_LIBRARIES(qd8-f32-qc4w-gemm-bench PRIVATE benchmark::benchmark fp16 pthreadpool packing)
TARGET_LINK_LIBRARIES(qd8-f32-qc4w-gemm-bench PRIVATE bench-utils hardware-config logging microkernels-all microparams-init)

ADD_EXECUTABLE(qp8-f32-qc4w-gemm-bench bench/qp8-f32-qc4w-gemm.cc)
TARGET_INCLUDE_DIRECTORIES(qp8-f32-qc4w-gemm-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(qp8-f32-qc4w-gemm-bench PRIVATE benchmark::benchmark fp16 pthreadpool packing)
TARGET_LINK_LIBRARIES(qp8-f32-qc4w-gemm-bench PRIVATE bench-utils hardware-config logging microkernels-all microparams-init)

ADD_EXECUTABLE(f32-vabs-bench bench/f32-vabs.cc)
TARGET_INCLUDE_DIRECTORIES(f32-vabs-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(f32-vabs-bench PRIVATE benchmark::benchmark pthreadpool)
Expand Down Expand Up @@ -4494,6 +4569,14 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_LINK_LIBRARIES(x24-transpose-bench PRIVATE benchmark::benchmark pthreadpool)
TARGET_LINK_LIBRARIES(x24-transpose-bench PRIVATE bench-utils hardware-config logging microkernels-all microparams-init)

ADD_EXECUTABLE(x8-packq-bench bench/x8-packq.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packq-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(x8-packq-bench PRIVATE benchmark::benchmark pthreadpool)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(x8-packq-bench PRIVATE kleidiai)
ENDIF()
TARGET_LINK_LIBRARIES(x8-packq-bench PRIVATE packq-benchmark bench-utils hardware-config logging microkernels-all packing)

ADD_EXECUTABLE(x8-packw-bench bench/x8-packw.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packw-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(x8-packw-bench PRIVATE benchmark::benchmark pthreadpool)
Expand Down
22 changes: 22 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,27 @@ http_archive(
],
)

# LINT.IfChange
# Google Test framework, used by most unit-tests.
http_archive(
name = "com_google_googletest",
sha256 = "5cb522f1427558c6df572d6d0e1bf0fd076428633d080e88ad5312be0b6a8859",
strip_prefix = "googletest-e23cdb78e9fef1f69a9ef917f447add5638daf2a",
urls = ["https://github.com/google/googletest/archive/e23cdb78e9fef1f69a9ef917f447add5638daf2a.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleTest.cmake)

# LINT.IfChange
# Google Benchmark library, used in micro-benchmarks.
http_archive(
name = "com_google_benchmark",
sha256 = "1ba14374fddcd9623f126b1a60945e4deac4cdc4fb25a5f25e7f779e36f2db52",
strip_prefix = "benchmark-d2a8a4ee41b923876c034afb939c4fc03598e622",
urls = ["https://github.com/google/benchmark/archive/d2a8a4ee41b923876c034afb939c4fc03598e622.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleBenchmark.cmake)

# LINT.IfChange
# FP16 library, used for half-precision conversions
http_archive(
name = "FP16",
Expand All @@ -59,14 +64,17 @@ http_archive(
"https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip",
],
)
# LINT.ThenChange(cmake/DownloadFP16.cmake)

# LINT.IfChange
# FXdiv library, used for repeated integer division by the same factor
http_archive(
name = "FXdiv",
sha256 = "ab7dfb08829bee33dca38405d647868fb214ac685e379ec7ef2bebcd234cd44d",
strip_prefix = "FXdiv-b408327ac2a15ec3e43352421954f5b1967701d1",
urls = ["https://github.com/Maratyszcza/FXdiv/archive/b408327ac2a15ec3e43352421954f5b1967701d1.zip"],
)
# LINT.ThenChange(cmake/DownloadFXdiv.cmake)

# LINT.IfChange
# pthreadpool library, used for parallelization
Expand All @@ -78,6 +86,7 @@ http_archive(
)
# LINT.ThenChange(cmake/DownloadPThreadPool.cmake)

# LINT.IfChange
# cpuinfo library, used for detecting processor characteristics
http_archive(
name = "cpuinfo",
Expand All @@ -87,6 +96,19 @@ http_archive(
"https://github.com/pytorch/cpuinfo/archive/d6860c477c99f1fce9e28eb206891af3c0e1a1d7.zip"
],
)
# LINT.ThenChange(cmake/DownloadCpuinfo.cmake)

# LINT.IfChange
# KleidiAI library, used for ARM microkernels.
http_archive(
name = "KleidiAI",
sha256 = "39b26d8840ec719afaa480b0622a77952d0f22dbb8e8ba58ec9f93e39895a205",
strip_prefix = "kleidiai-1976f8661e8d5aa7d4cdca0f3d2a915e5ecb4c53",
urls = [
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/1976f8661e8d5aa7d4cdca0f3d2a915e5ecb4c53/kleidiai-1976f8661e8d5aa7d4cdca0f3d2a915e5ecb4c53.zip"
],
)
# LINT.ThenChange(cmake/DownloadKleidiAI.cmake)

# Ruy library, used to benchmark against
http_archive(
Expand Down
Loading

0 comments on commit 90e114d

Please sign in to comment.