Don't imply dynamic llama.cpp just because CUDA is on

Link against CUDA statically as well to maintain consistency with GGML_STATIC although technically that's our discretion.
utilityai · Feb 22, 2025 · a1c640b · a1c640b
1 parent 8b11c5c
commit a1c640b
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 3 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -21,6 +21,7 @@ encoding_rs = { workspace = true }
 [features]
 default = ["openmp", "android-shared-stdcxx"]
 cuda = ["llama-cpp-sys-2/cuda"]
+cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"]
 metal = ["llama-cpp-sys-2/metal"]
 dynamic-link = ["llama-cpp-sys-2/dynamic-link"]
 vulkan = ["llama-cpp-sys-2/vulkan"]

diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -74,15 +74,18 @@ include = [
 bindgen = { workspace = true }
 cc = { workspace = true, features = ["parallel"] }
 cmake = "0.1"
+find_cuda_helper = "0.2.0"
 glob = "0.3.2"
 walkdir = "2"
 
 [features]
 cuda = []
+# Disables the need to dynamically link against libcuda.so / cuda.dll
+cuda-no-vmm = ["cuda"]
 metal = []
 dynamic-link = []
 vulkan = []
 native = []
 openmp = []
 # Only has an impact on Android.
-shared-stdcxx = []
+shared-stdcxx = []
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -179,7 +179,7 @@ fn main() {
     let target_dir = get_cargo_target_dir().unwrap();
     let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("Failed to get CARGO_MANIFEST_DIR");
     let llama_src = Path::new(&manifest_dir).join("llama.cpp");
-    let build_shared_libs = cfg!(feature = "cuda") || cfg!(feature = "dynamic-link");
+    let build_shared_libs = cfg!(feature = "dynamic-link");
 
     let build_shared_libs = std::env::var("LLAMA_BUILD_SHARED_LIBS")
         .map(|v| v == "1")
@@ -355,6 +355,10 @@ fn main() {
 
     if cfg!(feature = "cuda") {
         config.define("GGML_CUDA", "ON");
+
+        if cfg!(feature = "cuda-no-vmm") {
+            config.define("GGML_CUDA_NO_VMM", "ON");
+        }
     }
 
     // Android doesn't have OpenMP support AFAICT and openmp is a default feature. Do this here
@@ -394,6 +398,31 @@ fn main() {
     );
     println!("cargo:rustc-link-search={}", build_dir.display());
 
+    if cfg!(feature = "cuda") && !build_shared_libs {
+        println!("cargo:rerun-if-env-changed=CUDA_PATH");
+
+        for lib_dir in find_cuda_helper::find_cuda_lib_dirs() {
+            println!("cargo:rustc-link-search=native={}", lib_dir.display());
+        }
+
+        // Logic from ggml-cuda/CMakeLists.txt
+        println!("cargo:rustc-link-lib=static=cudart_static");
+        if matches!(target_os, TargetOs::Windows(_)) {
+            println!("cargo:rustc-link-lib=static=cublas");
+            println!("cargo:rustc-link-lib=static=cublasLt");
+        } else {
+            println!("cargo:rustc-link-lib=static=cublas_static");
+            println!("cargo:rustc-link-lib=static=cublasLt_static");
+        }
+
+        // Need to link against libcuda.so unless GGML_CUDA_NO_VMM is defined.
+        if !cfg!(feature = "cuda-no-vmm") {
+            println!("cargo:rustc-link-lib=cuda");
+        }
+
+        println!("cargo:rustc-link-lib=static=culibos");
+    }
+
     // Link libraries
     let llama_libs_kind = if build_shared_libs { "dylib" } else { "static" };
     let llama_libs = extract_lib_names(&out_dir, build_shared_libs);

diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp