From cfa76bd567516d897aae85d868a3b26fc8295134 Mon Sep 17 00:00:00 2001 From: Vitali Lovich Date: Fri, 21 Feb 2025 18:11:36 -0800 Subject: [PATCH] Don't imply dynamic llama.cpp just because CUDA is on Link against CUDA statically as well to maintain consistency with GGML_STATIC although technically that's our discretion. --- Cargo.lock | 10 ++++++++++ llama-cpp-2/Cargo.toml | 1 + llama-cpp-sys-2/Cargo.toml | 5 ++++- llama-cpp-sys-2/build.rs | 31 ++++++++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48f1a2a1..85918c3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -336,6 +336,15 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "find_cuda_helper" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad" +dependencies = [ + "glob", +] + [[package]] name = "flate2" version = "1.0.30" @@ -670,6 +679,7 @@ dependencies = [ "bindgen", "cc", "cmake", + "find_cuda_helper", "glob", "walkdir", ] diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index e836af25..97028540 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -21,6 +21,7 @@ encoding_rs = { workspace = true } [features] default = ["openmp", "android-shared-stdcxx"] cuda = ["llama-cpp-sys-2/cuda"] +cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"] metal = ["llama-cpp-sys-2/metal"] dynamic-link = ["llama-cpp-sys-2/dynamic-link"] vulkan = ["llama-cpp-sys-2/vulkan"] diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 5cf25353..6854794d 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -74,15 +74,18 @@ include = [ bindgen = { workspace = true } cc = { workspace = true, features = ["parallel"] } cmake = "0.1" +find_cuda_helper = "0.2.0" glob = "0.3.2" walkdir = "2" [features] cuda = [] +# Disables the need to dynamically link against libcuda.so / cuda.dll +cuda-no-vmm = ["cuda"] metal = [] dynamic-link = [] vulkan = [] native = [] openmp = [] # Only has an impact on Android. -shared-stdcxx = [] +shared-stdcxx = [] \ No newline at end of file diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index ec4ac7ce..206baddf 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -179,7 +179,7 @@ fn main() { let target_dir = get_cargo_target_dir().unwrap(); let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("Failed to get CARGO_MANIFEST_DIR"); let llama_src = Path::new(&manifest_dir).join("llama.cpp"); - let build_shared_libs = cfg!(feature = "cuda") || cfg!(feature = "dynamic-link"); + let build_shared_libs = cfg!(feature = "dynamic-link"); let build_shared_libs = std::env::var("LLAMA_BUILD_SHARED_LIBS") .map(|v| v == "1") @@ -355,6 +355,10 @@ fn main() { if cfg!(feature = "cuda") { config.define("GGML_CUDA", "ON"); + + if cfg!(feature = "cuda-no-vmm") { + config.define("GGML_CUDA_NO_VMM", "ON"); + } } // Android doesn't have OpenMP support AFAICT and openmp is a default feature. Do this here @@ -394,6 +398,31 @@ fn main() { ); println!("cargo:rustc-link-search={}", build_dir.display()); + if cfg!(feature = "cuda") && !build_shared_libs { + println!("cargo:rerun-if-env-changed=CUDA_PATH"); + + for lib_dir in find_cuda_helper::find_cuda_lib_dirs() { + println!("cargo:rustc-link-search=native={}", lib_dir.display()); + } + + // Logic from ggml-cuda/CMakeLists.txt + println!("cargo:rustc-link-lib=static=cudart_static"); + if matches!(target_os, TargetOs::Windows(_)) { + println!("cargo:rustc-link-lib=static=cublas"); + println!("cargo:rustc-link-lib=static=cublasLt"); + } else { + println!("cargo:rustc-link-lib=static=cublas_static"); + println!("cargo:rustc-link-lib=static=cublasLt_static"); + } + + // Need to link against libcuda.so unless GGML_CUDA_NO_VMM is defined. + if !cfg!(feature = "cuda-no-vmm") { + println!("cargo:rustc-link-lib=cuda"); + } + + println!("cargo:rustc-link-lib=static=culibos"); + } + // Link libraries let llama_libs_kind = if build_shared_libs { "dylib" } else { "static" }; let llama_libs = extract_lib_names(&out_dir, build_shared_libs);