Skip to content

Commit

Permalink
Add dependencies to the KleidiAI library to both the BUILD and `CMa…
Browse files Browse the repository at this point in the history
…keLists.txt` files.

PiperOrigin-RevId: 633527510
  • Loading branch information
gonnet authored and xnnpack-bot committed Jun 11, 2024
1 parent 56fa083 commit bf8c8a3
Show file tree
Hide file tree
Showing 42 changed files with 2,007 additions and 124 deletions.
39 changes: 36 additions & 3 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects") # buildifier: disable=out-of-order-load
load("@rules_python//python:py_binary.bzl", "py_binary")
load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_cc_library", "xnnpack_gcc_std_copts", "xnnpack_min_size_copts", "xnnpack_msvc_std_copts", "xnnpack_slinky_defines", "xnnpack_slinky_deps", "xnnpack_slinky_srcs", "xnnpack_std_cxxopts", "xnnpack_transitive_source_list", "xnnpack_visibility")
load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_cc_library", "xnnpack_gcc_std_copts", "xnnpack_if_kleidiai_enabled", "xnnpack_kleidiai_defines", "xnnpack_min_size_copts", "xnnpack_msvc_std_copts", "xnnpack_slinky_defines", "xnnpack_slinky_deps", "xnnpack_slinky_srcs", "xnnpack_std_cxxopts", "xnnpack_transitive_source_list", "xnnpack_visibility")
load("//gen:microkernels.bzl", "AARCH32_ASM_MICROKERNEL_SRCS", "AARCH32_JIT_MICROKERNEL_SRCS", "AARCH64_ASM_MICROKERNEL_SRCS", "AARCH64_JIT_MICROKERNEL_SRCS", "ALL_ARMSIMD32_MICROKERNEL_SRCS", "ALL_AVX2_MICROKERNEL_SRCS", "ALL_AVX512AMX_MICROKERNEL_SRCS", "ALL_AVX512FP16_MICROKERNEL_SRCS", "ALL_AVX512F_MICROKERNEL_SRCS", "ALL_AVX512SKX_MICROKERNEL_SRCS", "ALL_AVX512VBMI_MICROKERNEL_SRCS", "ALL_AVX512VNNIGFNI_MICROKERNEL_SRCS", "ALL_AVX512VNNI_MICROKERNEL_SRCS", "ALL_AVXVNNI_MICROKERNEL_SRCS", "ALL_AVX_MICROKERNEL_SRCS", "ALL_F16C_MICROKERNEL_SRCS", "ALL_FMA3_MICROKERNEL_SRCS", "ALL_FMA_MICROKERNEL_SRCS", "ALL_FP16ARITH_MICROKERNEL_SRCS", "ALL_HEXAGON_MICROKERNEL_SRCS", "ALL_HVX_MICROKERNEL_SRCS", "ALL_NEONBF16_AARCH64_MICROKERNEL_SRCS", "ALL_NEONBF16_MICROKERNEL_SRCS", "ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS", "ALL_NEONDOT_AARCH64_MICROKERNEL_SRCS", "ALL_NEONDOT_MICROKERNEL_SRCS", "ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS", "ALL_NEONFMA_MICROKERNEL_SRCS", "ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS", "ALL_NEONFP16ARITH_MICROKERNEL_SRCS", "ALL_NEONFP16_MICROKERNEL_SRCS", "ALL_NEONI8MM_MICROKERNEL_SRCS", "ALL_NEONV8_MICROKERNEL_SRCS", "ALL_NEON_AARCH64_MICROKERNEL_SRCS", "ALL_NEON_MICROKERNEL_SRCS", "ALL_RVVFP16ARITH_MICROKERNEL_SRCS", "ALL_RVV_MICROKERNEL_SRCS", "ALL_SCALAR_MICROKERNEL_SRCS", "ALL_SSE2_MICROKERNEL_SRCS", "ALL_SSE41_MICROKERNEL_SRCS", "ALL_SSE_MICROKERNEL_SRCS", "ALL_SSSE3_MICROKERNEL_SRCS", "ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS", "ALL_WASMSIMD_MICROKERNEL_SRCS", "ALL_WASM_MICROKERNEL_SRCS", "WASM32_ASM_MICROKERNEL_SRCS", "WASM32_JIT_MICROKERNEL_SRCS", "WASMRELAXEDSIMD32_JIT_MICROKERNEL_SRCS", "WASMSIMD32_JIT_MICROKERNEL_SRCS")

licenses(["notice"])
Expand Down Expand Up @@ -135,6 +135,7 @@ MICROKERNEL_HDRS = [
"src/xnnpack/lut.h",
"src/xnnpack/maxpool.h",
"src/xnnpack/packb.h",
"src/xnnpack/packq.h",
"src/xnnpack/packw.h",
"src/xnnpack/packx.h",
"src/xnnpack/pad.h",
Expand Down Expand Up @@ -725,9 +726,10 @@ xnnpack_cc_library(
"src/amalgam/gen/neon-aarch64.c",
"src/amalgam/gen/neon.c",
],
defines = xnnpack_kleidiai_defines(),
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
deps = MICROKERNEL_DEPS,
deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled(["@KleidiAI:kleidiai_neon"]),
)

xnnpack_cc_library(
Expand All @@ -739,9 +741,12 @@ xnnpack_cc_library(
],
aarch32_srcs = ALL_NEON_MICROKERNEL_SRCS,
aarch64_srcs = ALL_NEON_MICROKERNEL_SRCS + ALL_NEON_AARCH64_MICROKERNEL_SRCS,
defines = xnnpack_kleidiai_defines(),
gcc_copts = xnnpack_gcc_std_copts(),
msvc_copts = xnnpack_msvc_std_copts(),
deps = MICROKERNEL_DEPS,
deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled([
"@KleidiAI:kleidiai_neon",
]),
)

xnnpack_cc_library(
Expand Down Expand Up @@ -2564,6 +2569,18 @@ config_setting(
define_values = {"xnn_enable_cpuinfo": "false"},
)

# Enables usage of the KleidiAI library.
config_setting(
name = "xnn_enable_kleidiai_explicit_true",
define_values = {"xnn_enable_kleidiai": "true"},
)

# Disables usage of the KleidiAI library.
config_setting(
name = "xnn_enable_kleidiai_explicit_false",
define_values = {"xnn_enable_kleidiai": "false"},
)

# Enables usage of assembly kernels.
config_setting(
name = "xnn_enable_assembly_explicit_true",
Expand Down Expand Up @@ -2946,6 +2963,22 @@ alias(
}),
)

selects.config_setting_group(
name = "kleidiai_enabled_by_default",
match_any = [
"//build_config:aarch64",
],
)

alias(
name = "kleidiai_enabled",
actual = select({
":xnn_enable_kleidiai_explicit_true": ":xnn_enable_kleidiai_explicit_true",
":xnn_enable_kleidiai_explicit_false": ":xnn_enable_kleidiai_explicit_true",
"//conditions:default": ":kleidiai_enabled_by_default",
}),
)

selects.config_setting_group(
name = "assembly_enabled_by_default",
match_any = [
Expand Down
72 changes: 72 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ OPTION(USE_GNU_SOURCE "Use _GNU_SOURCE macro" OFF)
IF(XNNPACK_BUILD_BENCHMARKS OR XNNPACK_BUILD_TESTS)
SET(XNNPACK_BUILD_ALL_MICROKERNELS ON)
ENDIF()
OPTION(XNNPACK_ENABLE_KLEIDIAI "Use KleidiAI GEMM microkernels for Arm" OFF)

# --- [ Determine target processor
IF(CMAKE_OSX_ARCHITECTURES)
Expand Down Expand Up @@ -160,6 +161,7 @@ ADD_COMPILE_DEFINITIONS("XNN_ENABLE_SPARSE=$<BOOL:${XNNPACK_ENABLE_SPARSE}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_GEMM_M_SPECIALIZATION=$<BOOL:${XNNPACK_ENABLE_GEMM_M_SPECIALIZATION}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_DWCONV_MULTIPASS=$<BOOL:${XNNPACK_ENABLE_DWCONV_MULTIPASS}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_HVX=$<BOOL:${XNNPACK_ENABLE_HVX}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_KLEIDIAI=$<BOOL:${XNNPACK_ENABLE_KLEIDIAI}>")

IF(XNNPACK_PLATFORM_JIT STREQUAL "ON" OR XNNPACK_PLATFORM_JIT STREQUAL "OFF")
ADD_COMPILE_DEFINITIONS("XNN_PLATFORM_JIT=$<BOOL:${XNNPACK_PLATFORM_JIT}>")
Expand Down Expand Up @@ -253,6 +255,16 @@ IF(NOT XNNPACK_USE_SYSTEM_LIBS)
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googlebenchmark-download")
SET(GOOGLEBENCHMARK_SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-source" CACHE STRING "Google Benchmark source directory")
ENDIF()

IF(XNNPACK_ENABLE_KLEIDIAI AND NOT DEFINED KLEIDIAI_SOURCE_DIR)
MESSAGE(STATUS "Downloading KleidiAI to ${CMAKE_BINARY_DIR}/kleidiai-source (define KLEIDIAI_SOURCE_DIR to avoid it)")
CONFIGURE_FILE(cmake/DownloadKleidiAI.cmake "${CMAKE_BINARY_DIR}/kleidiai-download/CMakeLists.txt")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/kleidiai-download")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/kleidiai-download")
SET(KLEIDIAI_SOURCE_DIR "${CMAKE_BINARY_DIR}/kleidiai-source" CACHE STRING "kleidiai source directory")
ENDIF()
ENDIF()

# ---[ XNNPACK library
Expand Down Expand Up @@ -1136,6 +1148,34 @@ IF(XNNPACK_BUILD_LIBRARY)
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
ENDIF()

# ---[ Configure KleidiAI
IF(XNNPACK_ENABLE_KLEIDIAI)
IF(NOT TARGET kleidiai)
IF(NOT XNNPACK_USE_SYSTEM_LIBS)
SET(KLEIDIAI_BUILD_TESTS OFF CACHE BOOL "")
ADD_SUBDIRECTORY(
"${KLEIDIAI_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/kleidiai")
ELSE()
ADD_LIBRARY(kleidiai SHARED IMPORTED)
FIND_LIBRARY(KLEIDIAI_LIBRARY kleidiai PATHS "${KLEIDIAI_SOURCE_DIR}/lib")
IF(NOT KLEIDIAI_LIBRARY)
MESSAGE(FATAL_ERROR "Cannot find KleidiAI")
ENDIF()
TARGET_INCLUDE_DIRECTORIES(kleidiai INTERFACE "${KLEIDIAI_SOURCE_DIR}")
SET_PROPERTY(TARGET kleidiai PROPERTY IMPORTED_LOCATION "${KLEIDIAI_LIBRARY}")
SET_PROPERTY(TARGET kleidiai PROPERTY IMPORTED_IMPLIB "${KLEIDIAI_LIBRARY}")
ENDIF()
ENDIF()
IF(XNNPACK_BUILD_ALL_MICROKERNELS)
TARGET_LINK_LIBRARIES(microkernels-all PRIVATE kleidiai)
ENDIF()
TARGET_LINK_LIBRARIES(microkernels-prod PRIVATE kleidiai)
IF(XNNPACK_BUILD_LIBRARY)
TARGET_LINK_LIBRARIES(XNNPACK PRIVATE kleidiai)
ENDIF()
ENDIF()

# ---[ XNNPACK unit tests
IF(XNNPACK_BUILD_TESTS)
# ---[ Build google test
Expand Down Expand Up @@ -1193,6 +1233,13 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_INCLUDE_DIRECTORIES(convolution-test-helpers PRIVATE include src)
TARGET_LINK_LIBRARIES(convolution-test-helpers PRIVATE fp16)

ADD_LIBRARY(packq-microkernel-tester STATIC test/packq-microkernel-tester.cc)
TARGET_INCLUDE_DIRECTORIES(packq-microkernel-tester PRIVATE . include src test)
TARGET_LINK_LIBRARIES(packq-microkernel-tester PRIVATE XNNPACK fp16 pthreadpool GTest::gtest)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(packq-microkernel-tester PRIVATE kleidiai)
ENDIF()

IF(XNNPACK_BUILD_LIBRARY)
# ---[ Build size tests
ADD_EXECUTABLE(operator-size-test test/operator-size.c)
Expand Down Expand Up @@ -3340,6 +3387,15 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(x32-packb-test PRIVATE hardware-config logging microkernels-all packing)
ADD_TEST(NAME x32-packb-test COMMAND x32-packb-test)

ADD_EXECUTABLE(x8-packq-test test/x8-packq.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packq-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(x8-packq-test PRIVATE pthreadpool GTest::gtest GTest::gtest_main)
TARGET_LINK_LIBRARIES(x8-packq-test PRIVATE packq-microkernel-tester hardware-config logging microkernels-all packing)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(x8-packq-test PRIVATE kleidiai)
ENDIF()
ADD_TEST(NAME x8-packq-test COMMAND x8-packq-test)

ADD_EXECUTABLE(x8-packw-test test/x8-packw.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packw-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(x8-packw-test PRIVATE pthreadpool GTest::gtest GTest::gtest_main)
Expand Down Expand Up @@ -3519,6 +3575,14 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_LINK_LIBRARIES(bench-utils PRIVATE logging memory)
ENDIF()

# Helper libraries
ADD_LIBRARY(packq-benchmark STATIC bench/packq-benchmark.cc)
TARGET_INCLUDE_DIRECTORIES(packq-benchmark PRIVATE . include src bench)
TARGET_LINK_LIBRARIES(packq-benchmark PRIVATE XNNPACK benchmark::benchmark bench-utils)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(packq-benchmark PRIVATE kleidiai)
ENDIF()

# ---[ Build accuracy microbenchmarks
ADD_EXECUTABLE(f16-exp-ulp-eval eval/f16-exp-ulp.cc)
TARGET_INCLUDE_DIRECTORIES(f16-exp-ulp-eval PRIVATE . src)
Expand Down Expand Up @@ -4494,6 +4558,14 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_LINK_LIBRARIES(x24-transpose-bench PRIVATE benchmark::benchmark pthreadpool)
TARGET_LINK_LIBRARIES(x24-transpose-bench PRIVATE bench-utils hardware-config logging microkernels-all microparams-init)

ADD_EXECUTABLE(x8-packq-bench bench/x8-packq.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packq-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(x8-packq-bench PRIVATE benchmark::benchmark pthreadpool)
IF(XNNPACK_ENABLE_KLEIDIAI)
TARGET_LINK_LIBRARIES(x8-packq-bench PRIVATE kleidiai)
ENDIF()
TARGET_LINK_LIBRARIES(x8-packq-bench PRIVATE packq-benchmark bench-utils hardware-config logging microkernels-all packing)

ADD_EXECUTABLE(x8-packw-bench bench/x8-packw.cc)
TARGET_INCLUDE_DIRECTORIES(x8-packw-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(x8-packw-bench PRIVATE benchmark::benchmark pthreadpool)
Expand Down
22 changes: 22 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,27 @@ http_archive(
],
)

# LINT.IfChange
# Google Test framework, used by most unit-tests.
http_archive(
name = "com_google_googletest",
sha256 = "5cb522f1427558c6df572d6d0e1bf0fd076428633d080e88ad5312be0b6a8859",
strip_prefix = "googletest-e23cdb78e9fef1f69a9ef917f447add5638daf2a",
urls = ["https://github.com/google/googletest/archive/e23cdb78e9fef1f69a9ef917f447add5638daf2a.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleTest.cmake)

# LINT.IfChange
# Google Benchmark library, used in micro-benchmarks.
http_archive(
name = "com_google_benchmark",
sha256 = "1ba14374fddcd9623f126b1a60945e4deac4cdc4fb25a5f25e7f779e36f2db52",
strip_prefix = "benchmark-d2a8a4ee41b923876c034afb939c4fc03598e622",
urls = ["https://github.com/google/benchmark/archive/d2a8a4ee41b923876c034afb939c4fc03598e622.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleBenchmark.cmake)

# LINT.IfChange
# FP16 library, used for half-precision conversions
http_archive(
name = "FP16",
Expand All @@ -59,14 +64,17 @@ http_archive(
"https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip",
],
)
# LINT.ThenChange(cmake/DownloadFP16.cmake)

# LINT.IfChange
# FXdiv library, used for repeated integer division by the same factor
http_archive(
name = "FXdiv",
sha256 = "ab7dfb08829bee33dca38405d647868fb214ac685e379ec7ef2bebcd234cd44d",
strip_prefix = "FXdiv-b408327ac2a15ec3e43352421954f5b1967701d1",
urls = ["https://github.com/Maratyszcza/FXdiv/archive/b408327ac2a15ec3e43352421954f5b1967701d1.zip"],
)
# LINT.ThenChange(cmake/DownloadFXdiv.cmake)

# LINT.IfChange
# pthreadpool library, used for parallelization
Expand All @@ -78,6 +86,7 @@ http_archive(
)
# LINT.ThenChange(cmake/DownloadPThreadPool.cmake)

# LINT.IfChange
# cpuinfo library, used for detecting processor characteristics
http_archive(
name = "cpuinfo",
Expand All @@ -87,6 +96,19 @@ http_archive(
"https://github.com/pytorch/cpuinfo/archive/d6860c477c99f1fce9e28eb206891af3c0e1a1d7.zip"
],
)
# LINT.ThenChange(cmake/DownloadCpuinfo.cmake)

# LINT.IfChange
# KleidiAI library, used for ARM microkernels.
http_archive(
name = "KleidiAI",
sha256 = "39b26d8840ec719afaa480b0622a77952d0f22dbb8e8ba58ec9f93e39895a205",
strip_prefix = "kleidiai-1976f8661e8d5aa7d4cdca0f3d2a915e5ecb4c53",
urls = [
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/1976f8661e8d5aa7d4cdca0f3d2a915e5ecb4c53/kleidiai-1976f8661e8d5aa7d4cdca0f3d2a915e5ecb4c53.zip"
],
)
# LINT.ThenChange(cmake/DownloadKleidiAI.cmake)

# Ruy library, used to benchmark against
http_archive(
Expand Down
31 changes: 28 additions & 3 deletions bench/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ load(

MICROKERNEL_BENCHMARK_DEPS = [
":bench_utils",
"@FP16",
"//:aligned_allocator",
"//:test_microkernels",
"//:common",
"//:enable_assembly",
"//:jit",
"//:microkernels_h",
"//:microparams_init",
"//:microparams",
"//:packing",
"//:params",
"//:microparams",
"//:microparams_init",
"//:xnnpack_h",
"@FP16",
]

OPERATOR_BENCHMARK_DEPS = [
Expand Down Expand Up @@ -961,6 +961,31 @@ xnnpack_benchmark(
deps = MICROKERNEL_BENCHMARK_DEPS,
)

xnnpack_cc_library(
name = "packq_benchmark",
srcs = [
"bgemm.h",
"packq-benchmark.cc",
],
hdrs = ["packq-benchmark.h"],
deps = MICROKERNEL_BENCHMARK_DEPS + [
"@com_google_benchmark//:benchmark",
],
)

xnnpack_benchmark(
name = "x8_packq_bench",
srcs = [
"bgemm.h",
"x8-packq.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS + [
":packq_benchmark",
"//:allocator",
"//:math",
],
)

xnnpack_benchmark(
name = "x8_packw_bench",
srcs = [
Expand Down
Loading

0 comments on commit bf8c8a3

Please sign in to comment.