Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hipBLAS feature and fix build script #337

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ tracing = { workspace = true }
[features]
cuda = ["llama-cpp-sys-2/cuda"]
metal = ["llama-cpp-sys-2/metal"]
hipblas = ["llama-cpp-sys-2/hipblas"]
sampler = []

[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }

[lints]
Expand Down
1 change: 1 addition & 0 deletions llama-cpp-2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//! # Feature Flags
//!
//! - `cuda` enables CUDA gpu support.
//! - `hipblas` enables hipBLAS (ROCm) gpu support (experimental).
//! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
use std::ffi::NulError;
use std::fmt::Debug;
Expand Down
3 changes: 2 additions & 1 deletion llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ include = [
bindgen = { workspace = true }
cc = { workspace = true, features = ["parallel"] }
once_cell = "1.19.0"
glob = "0.3.1"

[features]
cuda = []
metal = []

hipblas = []
53 changes: 31 additions & 22 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use std::env;
use std::env::{self, VarError};
use std::fs::{read_dir, File};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str::FromStr;

use cc::Build;
use once_cell::sync::Lazy;
use glob::glob;

// This build file is based on:
// https://github.com/mdrokz/rust-llama.cpp/blob/master/build.rs
Expand Down Expand Up @@ -365,23 +367,16 @@ fn compile_blis(cx: &mut Build) {
}

fn compile_hipblas(cx: &mut Build, cxx: &mut Build, mut hip: Build) -> &'static str {
const DEFAULT_ROCM_PATH_STR: &str = "/opt/rocm/";
let rocm_path_str = env::var("ROCM_PATH").or(Ok::<String, VarError>(String::from_str("/opt/rocm/").unwrap())).unwrap();

let rocm_path_str = env::var("ROCM_PATH")
.map_err(|_| DEFAULT_ROCM_PATH_STR.to_string())
.unwrap();
println!("Compiling HIPBLAS GGML. Using ROCm from {rocm_path_str}");
println!("Compiling hipBLAS GGML. Using ROCm from {rocm_path_str}");

let rocm_path = PathBuf::from(rocm_path_str);
let rocm_include = rocm_path.join("include");
let rocm_lib = rocm_path.join("lib");
let rocm_hip_bin = rocm_path.join("bin/hipcc");

let cuda_lib = "ggml-cuda";
let cuda_file = cuda_lib.to_string() + ".cu";
let cuda_header = cuda_lib.to_string() + ".h";

let defines = ["GGML_USE_HIPBLAS", "GGML_USE_CUBLAS"];
let defines = ["GGML_USE_HIPBLAS", "GGML_USE_CUDA"];
for def in defines {
cx.define(def, None);
cxx.define(def, None);
Expand All @@ -390,24 +385,39 @@ fn compile_hipblas(cx: &mut Build, cxx: &mut Build, mut hip: Build) -> &'static
cx.include(&rocm_include);
cxx.include(&rocm_include);

let ggml_cuda = glob(LLAMA_PATH.join("ggml-cuda").join("*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();
let ggml_template_fattn = glob(LLAMA_PATH.join("ggml-cuda").join("template-instances").join("fattn-vec*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();
let ggml_template_wmma = glob(LLAMA_PATH.join("ggml-cuda").join("template-instances").join("fattn-wmma*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();
let ggml_template_mmq = glob(LLAMA_PATH.join("ggml-cuda").join("template-instances").join("mmq*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();

hip.compiler(rocm_hip_bin)
.std("c++11")
.file(LLAMA_PATH.join(cuda_file))
.include(LLAMA_PATH.join(cuda_header))
.define("LLAMA_CUDA_DMMV_X", Some("32"))
.define("LLAMA_CUDA_MMV_Y", Some("1"))
.define("LLAMA_CUDA_KQUANTS_ITER", Some("2"))
.file(LLAMA_PATH.join("ggml-cuda.cu"))
.files(ggml_cuda)
.files(ggml_template_fattn)
.files(ggml_template_wmma)
.files(ggml_template_mmq)
.include(LLAMA_PATH.join(""))
.include(LLAMA_PATH.join("ggml-cuda"))
.define("GGML_USE_HIPBLAS", None)
.compile(cuda_lib);
.define("GGML_USE_CUDA", None)
.compile("ggml-cuda");

println!(
"cargo:rustc-link-search=native={}",
rocm_lib.to_string_lossy()
);
println!("cargo:rustc-link-search=native={}", rocm_lib.to_string_lossy());

let rocm_libs = ["hipblas", "rocblas", "amdhip64"];
for lib in rocm_libs {
println!("cargo:rustc-link-lib={lib}");
}

cuda_lib
"ggml-cuda"
}

fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'static str {
Expand Down Expand Up @@ -507,8 +517,7 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
let common = LLAMA_PATH.join("ggml-common.h");

let input_file = File::open(ggml_metal_shader_path).expect("Failed to open input file");
let mut output_file =
File::create(&ggml_metal_shader_out_path).expect("Failed to create output file");
let output_file = File::create(&ggml_metal_shader_out_path).expect("Failed to create output file");

let output = Command::new("sed")
.arg("-e")
Expand Down Expand Up @@ -656,7 +665,7 @@ fn main() {
push_warn_flags(&mut cx, &mut cxx);
push_feature_flags(&mut cx, &mut cxx);

let feat_lib = if cfg!(feature = "vulkan") {
let _feat_lib = if cfg!(feature = "vulkan") {
Some(compile_vulkan(&mut cx, &mut cxx))
} else if cfg!(feature = "cuda") {
Some(compile_cuda(&mut cx, &mut cxx, featless_cxx))
Expand Down
8 changes: 4 additions & 4 deletions simple/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct Args {
#[arg(short = 'o', value_parser = parse_key_val)]
key_value_overrides: Vec<(String, ParamOverrideValue)>,
/// Disable offloading layers to the gpu
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "hipblas"))]
#[clap(long)]
disable_gpu: bool,
#[arg(short = 's', long, help = "RNG seed (default: 1234)")]
Expand Down Expand Up @@ -124,7 +124,7 @@ fn main() -> Result<()> {
model,
prompt,
file,
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "hipblas"))]
disable_gpu,
key_value_overrides,
seed,
Expand All @@ -138,13 +138,13 @@ fn main() -> Result<()> {

// offload all layers to the gpu
let model_params = {
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "hipblas"))]
if !disable_gpu {
LlamaModelParams::default().with_n_gpu_layers(1000)
} else {
LlamaModelParams::default()
}
#[cfg(not(feature = "cuda"))]
#[cfg(not(any(feature = "cuda", feature = "hipblas")))]
LlamaModelParams::default()
};

Expand Down