Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[XRT-LITE] add ability to configure NPU power mode #851

Merged
merged 1 commit into from
Oct 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion build_tools/ci/run_matmul_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ fi
# note this will not actually show any devices because --xrt_lite_n_core_rows --xrt_lite_n_core_cols are not passed
# which i have omitted to make the conditional slightly more succinct
if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-lite) ]]; then

$IREE_INSTALL_DIR/bin/iree-benchmark-module \
--module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \
--function=matmul_64x64_64xbf16_ \
Expand All @@ -804,7 +805,25 @@ if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-li
--device=xrt-lite \
--benchmark_repetitions=10 \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \

# TURBO POWER!!!!!!!!!!!!!!!!!
set +o pipefail
sudo -nv 2>&1 && has_sudo="true" || has_sudo="false"
set -o pipefail
if [ has_sudo == "true" ]; then
sudo $IREE_INSTALL_DIR/bin/iree-benchmark-module \
--module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \
--function=matmul_64x64_64xbf16_ \
--input=64x64xbf16 \
--input=64x64xbf16 \
--device=xrt-lite \
--benchmark_repetitions=10 \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \
--xrt_lite_power_mode=turbo
fi

fi

echo "$MATMUL_TESTS_RUN matmul tests run!"
Expand Down
2 changes: 2 additions & 0 deletions runtime/src/iree-amd-aie/driver/xrt-lite/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
#ifndef IREE_AMD_AIE_DRIVER_XRT_LITE_API_H_
#define IREE_AMD_AIE_DRIVER_XRT_LITE_API_H_

#include "iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h"
#include "iree/base/api.h"
#include "iree/hal/api.h"

struct iree_hal_xrt_lite_device_params {
int32_t n_core_rows;
int32_t n_core_cols;
iree_string_view_t power_mode;
};

IREE_API_EXPORT void iree_hal_xrt_lite_device_options_initialize(
Expand Down
26 changes: 24 additions & 2 deletions runtime/src/iree-amd-aie/driver/xrt-lite/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,26 @@ iree_hal_xrt_lite_device::iree_hal_xrt_lite_device(

iree_hal_resource_initialize(&iree_hal_xrt_lite_device_vtable, &resource);
this->host_allocator = host_allocator;
shim_device =
new shim_xdna::device(options->n_core_rows, options->n_core_cols);
this->power_mode = options->power_mode;
if (iree_string_view_equal(power_mode, IREE_SV("default"))) {
shim_device = new shim_xdna::device(
options->n_core_rows, options->n_core_cols, POWER_MODE_DEFAULT);
} else if (iree_string_view_equal(power_mode, IREE_SV("low"))) {
shim_device = new shim_xdna::device(options->n_core_rows,
options->n_core_cols, POWER_MODE_LOW);
} else if (iree_string_view_equal(power_mode, IREE_SV("medium"))) {
shim_device = new shim_xdna::device(
options->n_core_rows, options->n_core_cols, POWER_MODE_MEDIUM);
} else if (iree_string_view_equal(power_mode, IREE_SV("high"))) {
shim_device = new shim_xdna::device(options->n_core_rows,
options->n_core_cols, POWER_MODE_HIGH);
} else if (iree_string_view_equal(power_mode, IREE_SV("turbo"))) {
shim_device = new shim_xdna::device(options->n_core_rows,
options->n_core_cols, POWER_MODE_TURBO);
} else {
shim_device =
new shim_xdna::device(options->n_core_rows, options->n_core_cols);
}

iree_status_t status = iree_hal_xrt_lite_allocator_create(
host_allocator, shim_device, &device_allocator);
Expand Down Expand Up @@ -202,6 +220,10 @@ static void iree_hal_xrt_lite_device_destroy(iree_hal_device_t* base_device) {
base_device, iree_hal_xrt_lite_device_vtable, iree_hal_xrt_lite_device);

iree_hal_allocator_release(device->device_allocator);
if (!iree_string_view_is_empty(device->power_mode) &&
!iree_string_view_equal(device->power_mode, IREE_SV("default"))) {
device->shim_device->set_power_mode(POWER_MODE_DEFAULT);
}
delete device->shim_device;
iree_allocator_free(device->host_allocator, device);

Expand Down
1 change: 1 addition & 0 deletions runtime/src/iree-amd-aie/driver/xrt-lite/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct iree_hal_xrt_lite_device {
// should come last; see the definition of total_size below in
// iree_hal_xrt_lite_device_create
iree_string_view_t identifier;
iree_string_view_t power_mode;

iree_hal_xrt_lite_device(const iree_hal_xrt_lite_device_params* options,
iree_allocator_t host_allocator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ IREE_FLAG(int32_t, xrt_lite_n_core_rows, 0,
"Number of core rows to use on NPU.");
IREE_FLAG(int32_t, xrt_lite_n_core_cols, 0,
"Number of core cols to use on NPU.");
// see shim/linux/kmq/amdxdna_accel.h#L460 for options
IREE_FLAG(string, xrt_lite_power_mode, "", "Set the power mode of the NPU.");

static const iree_string_view_t key_xrt_lite_n_core_rows =
iree_string_view_literal("xrt_lite_n_core_rows");
static const iree_string_view_t key_xrt_lite_n_core_cols =
iree_string_view_literal("xrt_lite_n_core_cols");
static const iree_string_view_t key_xrt_lite_power_mode =
iree_string_view_literal("xrt_lite_power_mode");

static iree_status_t iree_hal_xrt_lite_driver_factory_enumerate(
void* self, iree_host_size_t* out_driver_info_count,
Expand Down Expand Up @@ -46,6 +50,13 @@ static iree_status_t iree_hal_xrt_lite_driver_parse_flags(
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_string_pair_builder_add_int32(builder, key_xrt_lite_n_core_cols,
FLAG_xrt_lite_n_core_cols));
iree_string_view_t power_mode = IREE_SV(FLAG_xrt_lite_power_mode);
if (!iree_string_view_is_empty(power_mode)) {
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_string_pair_builder_add(
builder,
iree_make_string_pair(key_xrt_lite_power_mode, power_mode)));
}

IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
Expand All @@ -68,14 +79,14 @@ static iree_status_t iree_hal_xrt_lite_driver_populate_options(
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_rows' expected to be int. Got: '%.*s'",
"Option 'xrt_lite_n_core_rows' expected to be int. Got: '%.*s'",
(int)value.size, value.data);
}
if (ivalue <= 0) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_rows' expected to be > 0. Got: '%.*s'",
"Option 'xrt_lite_n_core_rows' expected to be > 0. Got: '%.*s'",
(int)value.size, value.data);
}
device_params->n_core_rows = ivalue;
Expand All @@ -84,20 +95,34 @@ static iree_status_t iree_hal_xrt_lite_driver_populate_options(
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_cols' expected to be int. Got: '%.*s'",
"Option 'xrt_lite_n_core_cols' expected to be int. Got: '%.*s'",
(int)value.size, value.data);
}
if (ivalue <= 0) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_cols' expected to be > 0. Got: '%.*s'",
"Option 'xrt_lite_n_core_cols' expected to be > 0. Got: '%.*s'",
(int)value.size, value.data);
}
device_params->n_core_cols = ivalue;
} else if (iree_string_view_equal(key, key_xrt_lite_power_mode)) {
if (!(iree_string_view_equal(value, IREE_SV("default")) ||
iree_string_view_equal(value, IREE_SV("low")) ||
iree_string_view_equal(value, IREE_SV("medium")) ||
iree_string_view_equal(value, IREE_SV("high")) ||
iree_string_view_equal(value, IREE_SV("turbo")))) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'xrt_lite_power_mode' expected to be default | low | "
"medium | high | turbo. Got: '%.*s'",
(int)value.size, value.data);
}
device_params->power_mode = value;
} else {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Unrecognized options: %.*s", (int)key.size,
key.data);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,15 @@ device::device(uint32_t n_rows, uint32_t n_cols)
SHIM_DEBUG("Created KMQ device n_rows %d n_cols %d", n_rows, n_cols);
}

device::device(uint32_t n_rows, uint32_t n_cols,
amdxdna_power_mode_type power_mode)
: device(n_rows, n_cols) {
set_power_mode(power_mode);
SHIM_DEBUG("Created KMQ device n_rows %d n_cols %d with power_mode %s",
n_rows, n_cols,
stringify_amdxdna_power_mode_type(power_mode).c_str());
}

device::~device() { SHIM_DEBUG("Destroying KMQ device"); }

const pdev &device::get_pdev() const { return m_pdev; }
Expand Down Expand Up @@ -249,6 +258,31 @@ void device::write_aie_reg(uint16_t col, uint16_t row, uint32_t reg_addr,
m_pdev.ioctl(DRM_IOCTL_AMDXDNA_SET_STATE, &arg);
}

amdxdna_power_mode_type device::get_power_mode() const {
amdxdna_drm_get_power_mode state;
amdxdna_drm_get_info arg = {.param = DRM_AMDXDNA_GET_POWER_MODE,
.buffer_size = sizeof(state),
.buffer = reinterpret_cast<uintptr_t>(&state)};

m_pdev.ioctl(DRM_IOCTL_AMDXDNA_GET_INFO, &arg);
return static_cast<amdxdna_power_mode_type>(state.power_mode);
}

void device::set_power_mode(amdxdna_power_mode_type mode) const {
amdxdna_drm_set_power_mode state;
state.power_mode = mode;
amdxdna_drm_set_state arg = {.param = DRM_AMDXDNA_SET_POWER_MODE,
.buffer_size = sizeof(state),
.buffer = reinterpret_cast<uintptr_t>(&state)};
if (::ioctl(m_pdev.m_dev_fd, DRM_IOCTL_AMDXDNA_SET_STATE, &arg) == -1) {
shim_err(
errno,
"DRM_AMDXDNA_SET_POWER_MODE failed; probably you need sudo privileges");
}
SHIM_DEBUG("set power_mode to %s",
stringify_amdxdna_power_mode_type(mode).c_str());
}

std::string read_sysfs(const std::string &filename) {
std::ifstream file(filename);
std::string line;
Expand All @@ -274,4 +308,22 @@ std::filesystem::path find_npu_device() {
shim_err(errno, "No npu device found");
}

std::string stringify_amdxdna_power_mode_type(
amdxdna_power_mode_type power_mode) {
switch (power_mode) {
case POWER_MODE_DEFAULT:
return {"DEFAULT"};
case POWER_MODE_LOW:
return {"LOW"};
case POWER_MODE_MEDIUM:
return {"MEDIUM"};
case POWER_MODE_HIGH:
return {"HIGH"};
case POWER_MODE_TURBO:
return {"TURBO"};
default:
llvm::report_fatal_error("unknown power mode");
}
}

} // namespace shim_xdna
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <filesystem>
#include <map>

#include "amdxdna_accel.h"
#include "fence.h"
#include "xrt_mem.h"

Expand All @@ -33,6 +34,7 @@ struct device {
uint32_t n_cols;

device(uint32_t n_rows, uint32_t n_cols);
device(uint32_t n_rows, uint32_t n_cols, amdxdna_power_mode_type power_mode);
~device();

std::unique_ptr<bo> import_bo(int ehdl) const;
Expand All @@ -58,12 +60,18 @@ struct device {
void write_aie_reg(uint16_t col, uint16_t row, uint32_t reg_addr,
uint32_t reg_val);

// TODO(max): hide amdxdna_accel enums so they don't leak
amdxdna_power_mode_type get_power_mode() const;
void set_power_mode(amdxdna_power_mode_type mode) const;

std::unique_ptr<fence_handle> create_fence(fence_handle::access_mode);
std::unique_ptr<fence_handle> import_fence(pid_t, int);
};

std::string read_sysfs(const std::string &filename);
std::filesystem::path find_npu_device();
std::string stringify_amdxdna_power_mode_type(
amdxdna_power_mode_type power_mode);

} // namespace shim_xdna

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cassert>
#include <cstring>

#include "amdxdna_accel.h"
#include "bo.h"
#include "hwq.h"
#include "shim_debug.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include <map>

#include "amdxdna_accel.h"
#include "device.h"

namespace shim_xdna {
Expand Down Expand Up @@ -54,8 +53,7 @@ struct hw_ctx {
std::unique_ptr<hw_q> q, const std::vector<uint8_t> &pdi,
const std::string &cu_name, uint32_t n_rows, uint32_t n_cols);
hw_ctx(device &dev, const std::vector<uint8_t> &pdi,
const std::string &cu_name,
uint32_t n_rows, uint32_t n_cols,
const std::string &cu_name, uint32_t n_rows, uint32_t n_cols,
const std::map<std::string, uint32_t> &qos = {});
~hw_ctx();
// no copying
Expand Down
Loading