From 5a4dbd4429a1969d8940b67ba8b8a92dcd0366be Mon Sep 17 00:00:00 2001 From: Vitali Lovich Date: Thu, 13 Feb 2025 12:41:48 -0800 Subject: [PATCH] Fix CPU inference performance when building MSVC Rust debug Workaround for https://github.com/rust-lang/cmake-rs/issues/240 --- llama-cpp-sys-2/build.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index 1f393e06..b832fbbc 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -278,6 +278,22 @@ fn main() { config.define("GGML_BLAS", "OFF"); } + if (cfg!(debug_assertions) + || std::env::var("PROFILE").as_ref().map(String::as_str) == Ok("debug")) + && matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc)) + && profile == "Release" + { + // Debug Rust builds under MSVC turn off optimization even though we're ideally building the release profile of llama.cpp. + // Looks like an upstream bug: + // https://github.com/rust-lang/cmake-rs/issues/240 + // For now explicitly reinject the optimization flags that a CMake Release build is expected to have on in this scenario. + // This fixes CPU inference performance when part of a Rust debug build. + for flag in &["/O2", "/DNDEBUG", "/Ob2"] { + config.cflag(flag); + config.cxxflag(flag); + } + } + config.static_crt(static_crt); if matches!(target_os, TargetOs::Android) {