From d827213bdb46b40ec1da66c8486b1865e3903dd3 Mon Sep 17 00:00:00 2001 From: makslevental Date: Sat, 19 Oct 2024 15:37:56 -0400 Subject: [PATCH] [XRT-LITE] add ability to configure NPU power mode --- build_tools/ci/run_matmul_test.sh | 20 ++++++- .../src/iree-amd-aie/driver/xrt-lite/api.h | 2 + .../iree-amd-aie/driver/xrt-lite/device.cc | 26 +++++++++- .../src/iree-amd-aie/driver/xrt-lite/device.h | 1 + .../xrt-lite/registration/driver_module.c | 35 +++++++++++-- .../driver/xrt-lite/shim/linux/kmq/device.cpp | 52 +++++++++++++++++++ .../driver/xrt-lite/shim/linux/kmq/device.h | 8 +++ .../driver/xrt-lite/shim/linux/kmq/hwctx.cpp | 1 + .../driver/xrt-lite/shim/linux/kmq/hwctx.h | 4 +- 9 files changed, 138 insertions(+), 11 deletions(-) diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index 1ed121c44..0ace878f4 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -796,6 +796,7 @@ fi # note this will not actually show any devices because --xrt_lite_n_core_rows --xrt_lite_n_core_cols are not passed # which i have omitted to make the conditional slightly more succinct if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-lite) ]]; then + $IREE_INSTALL_DIR/bin/iree-benchmark-module \ --module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \ --function=matmul_64x64_64xbf16_ \ @@ -804,7 +805,24 @@ if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-li --device=xrt-lite \ --benchmark_repetitions=10 \ --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \ - --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS + --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \ + + # TURBO POWER!!!!!!!!!!!!!!!!! + # check if sudo + sudo -nv 2>&1 + if [ $? -eq 0 ]; then + sudo $IREE_INSTALL_DIR/bin/iree-benchmark-module \ + --module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \ + --function=matmul_64x64_64xbf16_ \ + --input=64x64xbf16 \ + --input=64x64xbf16 \ + --device=xrt-lite \ + --benchmark_repetitions=10 \ + --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \ + --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \ + --xrt_lite_power_mode=turbo + fi + fi echo "$MATMUL_TESTS_RUN matmul tests run!" diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/api.h b/runtime/src/iree-amd-aie/driver/xrt-lite/api.h index c969388ba..6696940c4 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/api.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/api.h @@ -7,12 +7,14 @@ #ifndef IREE_AMD_AIE_DRIVER_XRT_LITE_API_H_ #define IREE_AMD_AIE_DRIVER_XRT_LITE_API_H_ +#include "iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h" #include "iree/base/api.h" #include "iree/hal/api.h" struct iree_hal_xrt_lite_device_params { int32_t n_core_rows; int32_t n_core_cols; + iree_string_view_t power_mode; }; IREE_API_EXPORT void iree_hal_xrt_lite_device_options_initialize( diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/device.cc b/runtime/src/iree-amd-aie/driver/xrt-lite/device.cc index 323bd4aaa..1033ddd2f 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/device.cc +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/device.cc @@ -30,8 +30,26 @@ iree_hal_xrt_lite_device::iree_hal_xrt_lite_device( iree_hal_resource_initialize(&iree_hal_xrt_lite_device_vtable, &resource); this->host_allocator = host_allocator; - shim_device = - new shim_xdna::device(options->n_core_rows, options->n_core_cols); + this->power_mode = options->power_mode; + if (iree_string_view_equal(power_mode, IREE_SV("default"))) { + shim_device = new shim_xdna::device( + options->n_core_rows, options->n_core_cols, POWER_MODE_DEFAULT); + } else if (iree_string_view_equal(power_mode, IREE_SV("low"))) { + shim_device = new shim_xdna::device(options->n_core_rows, + options->n_core_cols, POWER_MODE_LOW); + } else if (iree_string_view_equal(power_mode, IREE_SV("medium"))) { + shim_device = new shim_xdna::device( + options->n_core_rows, options->n_core_cols, POWER_MODE_MEDIUM); + } else if (iree_string_view_equal(power_mode, IREE_SV("high"))) { + shim_device = new shim_xdna::device(options->n_core_rows, + options->n_core_cols, POWER_MODE_HIGH); + } else if (iree_string_view_equal(power_mode, IREE_SV("turbo"))) { + shim_device = new shim_xdna::device(options->n_core_rows, + options->n_core_cols, POWER_MODE_TURBO); + } else { + shim_device = + new shim_xdna::device(options->n_core_rows, options->n_core_cols); + } iree_status_t status = iree_hal_xrt_lite_allocator_create( host_allocator, shim_device, &device_allocator); @@ -202,6 +220,10 @@ static void iree_hal_xrt_lite_device_destroy(iree_hal_device_t* base_device) { base_device, iree_hal_xrt_lite_device_vtable, iree_hal_xrt_lite_device); iree_hal_allocator_release(device->device_allocator); + if (!iree_string_view_is_empty(device->power_mode) && + !iree_string_view_equal(device->power_mode, IREE_SV("default"))) { + device->shim_device->set_power_mode(POWER_MODE_DEFAULT); + } delete device->shim_device; iree_allocator_free(device->host_allocator, device); diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/device.h b/runtime/src/iree-amd-aie/driver/xrt-lite/device.h index ad3141e88..e7c7dd176 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/device.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/device.h @@ -25,6 +25,7 @@ struct iree_hal_xrt_lite_device { // should come last; see the definition of total_size below in // iree_hal_xrt_lite_device_create iree_string_view_t identifier; + iree_string_view_t power_mode; iree_hal_xrt_lite_device(const iree_hal_xrt_lite_device_params* options, iree_allocator_t host_allocator); diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/registration/driver_module.c b/runtime/src/iree-amd-aie/driver/xrt-lite/registration/driver_module.c index 72f4841b1..894d23a3b 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/registration/driver_module.c +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/registration/driver_module.c @@ -14,11 +14,15 @@ IREE_FLAG(int32_t, xrt_lite_n_core_rows, 0, "Number of core rows to use on NPU."); IREE_FLAG(int32_t, xrt_lite_n_core_cols, 0, "Number of core cols to use on NPU."); +// see shim/linux/kmq/amdxdna_accel.h#L460 for options +IREE_FLAG(string, xrt_lite_power_mode, "", "Set the power mode of the NPU."); static const iree_string_view_t key_xrt_lite_n_core_rows = iree_string_view_literal("xrt_lite_n_core_rows"); static const iree_string_view_t key_xrt_lite_n_core_cols = iree_string_view_literal("xrt_lite_n_core_cols"); +static const iree_string_view_t key_xrt_lite_power_mode = + iree_string_view_literal("xrt_lite_power_mode"); static iree_status_t iree_hal_xrt_lite_driver_factory_enumerate( void* self, iree_host_size_t* out_driver_info_count, @@ -46,6 +50,13 @@ static iree_status_t iree_hal_xrt_lite_driver_parse_flags( IREE_RETURN_AND_END_ZONE_IF_ERROR( z0, iree_string_pair_builder_add_int32(builder, key_xrt_lite_n_core_cols, FLAG_xrt_lite_n_core_cols)); + iree_string_view_t power_mode = IREE_SV(FLAG_xrt_lite_power_mode); + if (!iree_string_view_is_empty(power_mode)) { + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_string_pair_builder_add( + builder, + iree_make_string_pair(key_xrt_lite_power_mode, power_mode))); + } IREE_TRACE_ZONE_END(z0); return iree_ok_status(); @@ -68,14 +79,14 @@ static iree_status_t iree_hal_xrt_lite_driver_populate_options( IREE_TRACE_ZONE_END(z0); return iree_make_status( IREE_STATUS_FAILED_PRECONDITION, - "Option 'key_xrt_lite_n_core_rows' expected to be int. Got: '%.*s'", + "Option 'xrt_lite_n_core_rows' expected to be int. Got: '%.*s'", (int)value.size, value.data); } if (ivalue <= 0) { IREE_TRACE_ZONE_END(z0); return iree_make_status( IREE_STATUS_FAILED_PRECONDITION, - "Option 'key_xrt_lite_n_core_rows' expected to be > 0. Got: '%.*s'", + "Option 'xrt_lite_n_core_rows' expected to be > 0. Got: '%.*s'", (int)value.size, value.data); } device_params->n_core_rows = ivalue; @@ -84,20 +95,34 @@ static iree_status_t iree_hal_xrt_lite_driver_populate_options( IREE_TRACE_ZONE_END(z0); return iree_make_status( IREE_STATUS_FAILED_PRECONDITION, - "Option 'key_xrt_lite_n_core_cols' expected to be int. Got: '%.*s'", + "Option 'xrt_lite_n_core_cols' expected to be int. Got: '%.*s'", (int)value.size, value.data); } if (ivalue <= 0) { IREE_TRACE_ZONE_END(z0); return iree_make_status( IREE_STATUS_FAILED_PRECONDITION, - "Option 'key_xrt_lite_n_core_cols' expected to be > 0. Got: '%.*s'", + "Option 'xrt_lite_n_core_cols' expected to be > 0. Got: '%.*s'", (int)value.size, value.data); } device_params->n_core_cols = ivalue; + } else if (iree_string_view_equal(key, key_xrt_lite_power_mode)) { + if (!(iree_string_view_equal(value, IREE_SV("default")) || + iree_string_view_equal(value, IREE_SV("low")) || + iree_string_view_equal(value, IREE_SV("medium")) || + iree_string_view_equal(value, IREE_SV("high")) || + iree_string_view_equal(value, IREE_SV("turbo")))) { + IREE_TRACE_ZONE_END(z0); + return iree_make_status( + IREE_STATUS_FAILED_PRECONDITION, + "Option 'xrt_lite_power_mode' expected to be default | low | " + "medium | high | turbo. Got: '%.*s'", + (int)value.size, value.data); + } + device_params->power_mode = value; } else { IREE_TRACE_ZONE_END(z0); - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "Unrecognized options: %.*s", (int)key.size, key.data); } diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp index 8b71d5f38..c72d764f2 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp @@ -146,6 +146,15 @@ device::device(uint32_t n_rows, uint32_t n_cols) SHIM_DEBUG("Created KMQ device n_rows %d n_cols %d", n_rows, n_cols); } +device::device(uint32_t n_rows, uint32_t n_cols, + amdxdna_power_mode_type power_mode) + : device(n_rows, n_cols) { + set_power_mode(power_mode); + SHIM_DEBUG("Created KMQ device n_rows %d n_cols %d with power_mode %s", + n_rows, n_cols, + stringify_amdxdna_power_mode_type(power_mode).c_str()); +} + device::~device() { SHIM_DEBUG("Destroying KMQ device"); } const pdev &device::get_pdev() const { return m_pdev; } @@ -249,6 +258,31 @@ void device::write_aie_reg(uint16_t col, uint16_t row, uint32_t reg_addr, m_pdev.ioctl(DRM_IOCTL_AMDXDNA_SET_STATE, &arg); } +amdxdna_power_mode_type device::get_power_mode() const { + amdxdna_drm_get_power_mode state; + amdxdna_drm_get_info arg = {.param = DRM_AMDXDNA_GET_POWER_MODE, + .buffer_size = sizeof(state), + .buffer = reinterpret_cast(&state)}; + + m_pdev.ioctl(DRM_IOCTL_AMDXDNA_GET_INFO, &arg); + return static_cast(state.power_mode); +} + +void device::set_power_mode(amdxdna_power_mode_type mode) const { + amdxdna_drm_set_power_mode state; + state.power_mode = mode; + amdxdna_drm_set_state arg = {.param = DRM_AMDXDNA_SET_POWER_MODE, + .buffer_size = sizeof(state), + .buffer = reinterpret_cast(&state)}; + if (::ioctl(m_pdev.m_dev_fd, DRM_IOCTL_AMDXDNA_SET_STATE, &arg) == -1) { + shim_err( + errno, + "DRM_AMDXDNA_SET_POWER_MODE failed; probably you need sudo privileges"); + } + SHIM_DEBUG("set power_mode to %s", + stringify_amdxdna_power_mode_type(mode).c_str()); +} + std::string read_sysfs(const std::string &filename) { std::ifstream file(filename); std::string line; @@ -274,4 +308,22 @@ std::filesystem::path find_npu_device() { shim_err(errno, "No npu device found"); } +std::string stringify_amdxdna_power_mode_type( + amdxdna_power_mode_type power_mode) { + switch (power_mode) { + case POWER_MODE_DEFAULT: + return {"DEFAULT"}; + case POWER_MODE_LOW: + return {"LOW"}; + case POWER_MODE_MEDIUM: + return {"MEDIUM"}; + case POWER_MODE_HIGH: + return {"HIGH"}; + case POWER_MODE_TURBO: + return {"TURBO"}; + default: + llvm::report_fatal_error("unknown power mode"); + } +} + } // namespace shim_xdna diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h index 8ace4e79d..f646db0e4 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h @@ -7,6 +7,7 @@ #include #include +#include "amdxdna_accel.h" #include "fence.h" #include "xrt_mem.h" @@ -33,6 +34,7 @@ struct device { uint32_t n_cols; device(uint32_t n_rows, uint32_t n_cols); + device(uint32_t n_rows, uint32_t n_cols, amdxdna_power_mode_type power_mode); ~device(); std::unique_ptr import_bo(int ehdl) const; @@ -58,12 +60,18 @@ struct device { void write_aie_reg(uint16_t col, uint16_t row, uint32_t reg_addr, uint32_t reg_val); + // TODO(max): hide amdxdna_accel enums so they don't leak + amdxdna_power_mode_type get_power_mode() const; + void set_power_mode(amdxdna_power_mode_type mode) const; + std::unique_ptr create_fence(fence_handle::access_mode); std::unique_ptr import_fence(pid_t, int); }; std::string read_sysfs(const std::string &filename); std::filesystem::path find_npu_device(); +std::string stringify_amdxdna_power_mode_type( + amdxdna_power_mode_type power_mode); } // namespace shim_xdna diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp index 20a94efd7..f4be8c531 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp @@ -6,6 +6,7 @@ #include #include +#include "amdxdna_accel.h" #include "bo.h" #include "hwq.h" #include "shim_debug.h" diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h index 7a169e270..4ba5437a7 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h @@ -6,7 +6,6 @@ #include -#include "amdxdna_accel.h" #include "device.h" namespace shim_xdna { @@ -54,8 +53,7 @@ struct hw_ctx { std::unique_ptr q, const std::vector &pdi, const std::string &cu_name, uint32_t n_rows, uint32_t n_cols); hw_ctx(device &dev, const std::vector &pdi, - const std::string &cu_name, - uint32_t n_rows, uint32_t n_cols, + const std::string &cu_name, uint32_t n_rows, uint32_t n_cols, const std::map &qos = {}); ~hw_ctx(); // no copying