Skip to content

Commit

Permalink
failing to link dur to "mach-o"
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcusDunn committed Feb 5, 2024
1 parent 242e5a7 commit 8a73403
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 10 deletions.
4 changes: 4 additions & 0 deletions llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ anyhow = "1.0.79"
name = "grammar_bias"
harness = false

[[bench]]
name = "generate"
harness = false

[features]
cublas = ["llama-cpp-sys-2/cublas"]

Expand Down
61 changes: 61 additions & 0 deletions llama-cpp-2/benches/generate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use anyhow::Context;
use criterion::{Criterion, criterion_group, criterion_main};
use pprof::criterion::{Output, PProfProfiler};
use llama_cpp_2::context::params::LlamaContextParams;
use llama_cpp_2::llama_backend::LlamaBackend;
use llama_cpp_2::llama_batch::LlamaBatch;
use llama_cpp_2::model::{AddBos, LlamaModel};
use llama_cpp_2::model::params::LlamaModelParams;
use llama_cpp_2::token::data_array::LlamaTokenDataArray;

fn generate(c: &mut Criterion) {
let api = hf_hub::api::sync::ApiBuilder::new()
.with_progress(true)
.build()
.unwrap();
let file = api
.model("TheBloke/Llama-2-7B-Chat-GGUF".to_string())
.get("llama-2-7b-chat.Q4_K_M.gguf")
.unwrap();
let backend = LlamaBackend::init().unwrap();
let model_params = LlamaModelParams::default();
let model = LlamaModel::load_from_file(&backend, &file, &model_params).unwrap();
let mut ctx = model
.new_context(&backend, LlamaContextParams::default())
.unwrap();

c.bench_function("generate 50 tokens", |b| {
b.iter(|| {
let tokens_list = model.str_to_token("Hello, my name is", AddBos::Always).unwrap();
let mut n_ctx = tokens_list.len() as i32;
let mut batch = LlamaBatch::new(512, 1);
let last_index: i32 = (tokens_list.len() - 1) as i32;
for (i, token) in (0_i32..).zip(tokens_list.into_iter()) {
let is_last = i == last_index;
batch.add(token, i, &[0], is_last).unwrap();
}
ctx.decode(&mut batch).unwrap();

for _ in 0..50 {
let candidates = ctx.candidates_ith(batch.n_tokens() - 1);
let candidates_p = LlamaTokenDataArray::from_iter(candidates, false);
let new_token_id = ctx.sample_token_greedy(candidates_p);
if new_token_id == model.token_eos() {
break;
}
batch.clear();
batch.add(new_token_id, n_ctx, &[0], true).unwrap();
n_ctx += 1;
ctx.decode(&mut batch).unwrap();
}
ctx.clear_kv_cache_seq(0, None, None)
});
});
}

criterion_group!(
name = benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
targets = generate
);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion llama-cpp-2/benches/grammar_bias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn criterion_benchmark(c: &mut Criterion) {
let model_params = LlamaModelParams::default();
let model = LlamaModel::load_from_file(&backend, &file, &model_params).unwrap();
let mut ctx = model
.new_context(&backend, &LlamaContextParams::default())
.new_context(&backend, LlamaContextParams::default())
.unwrap();
let grammar = LlamaGrammar::from_str(include_str!("../src/grammar/json.gbnf")).unwrap();

Expand Down
68 changes: 59 additions & 9 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ fn main() {

let mut ggml = cc::Build::new();
let mut ggml_cuda = if cublas_enabled { Some(cc::Build::new()) } else { None };
let mut ggml_metal= if cfg!(target_os = "macos") { Some(cc::Build::new()) } else { None };
let mut llama_cpp = cc::Build::new();

ggml.cpp(false);
Expand Down Expand Up @@ -60,20 +61,22 @@ fn main() {
llama_cpp.define("GGML_USE_ACCELERATE", None);
llama_cpp.define("ACCELERATE_NEW_LAPACK", None);
llama_cpp.define("ACCELERATE_LAPACK_ILP64", None);
println!("cargo:rustc-link-lib=framework=Accelerate");
println!("cargo:rustc-link-arg=framework=Accelerate");

// MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L509-L511
println!("cargo:rustc-link-lib=framework Foundation");
println!("cargo:rustc-link-lib=framework Metal");
println!("cargo:rustc-link-lib=framework MetalKit");

println!("cargo:rustc-link-arg=framework=Foundation");
println!("cargo:rustc-link-arg=framework=Metal");
println!("cargo:rustc-link-arg=framework=MetalKit");
}

// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L517-L520
ggml
.file("llama.cpp/ggml-metal.m")
.file("llama.cpp/ggml-metal.h");
if let Some(ggml_metal) = &mut ggml_metal {
metal_hack(ggml_metal);
ggml_metal
.file("llama.cpp/ggml-metal")
.include("llama.cpp");
}

if cfg!(target_os = "dragonfly") {
llama_cpp.define("__BSD_VISIBLE", None);
}
Expand All @@ -83,6 +86,12 @@ fn main() {
ggml_cuda.compile("ggml-cuda");
}


if let Some(ggml_metal) = ggml_metal {
println!("compiling ggml-metal");
ggml_metal.compile("ggml-metal")
}

if cfg!(target_os = "linux") {
ggml.define("_GNU_SOURCE", None);
}
Expand All @@ -97,6 +106,7 @@ fn main() {

llama_cpp
.define("_XOPEN_SOURCE", Some("600"))
.include("llama.cpp")
.std("c++17")
.file("llama.cpp/llama.cpp");

Expand Down Expand Up @@ -124,3 +134,43 @@ fn main() {
.write_to_file(out_path.join("bindings.rs"))
.expect("failed to write bindings to file");
}


// courtesy of https://github.com/rustformers/llm
fn metal_hack(build: &mut cc::Build) {
const GGML_METAL_METAL_PATH: &str = "llama.cpp/ggml-metal.metal";
const GGML_METAL_PATH: &str = "llama.cpp/ggml-metal.m";

let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR is not defined"));

let ggml_metal_path = {
let ggml_metal_metal = std::fs::read_to_string(GGML_METAL_METAL_PATH)
.expect("Could not read ggml-metal.metal")
.replace('\\', "\\\\")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\"', "\\\"");

let ggml_metal =
std::fs::read_to_string(GGML_METAL_PATH).expect("Could not read ggml-metal.m");

let needle = r#"NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];"#;
if !ggml_metal.contains(needle) {
panic!("ggml-metal.m does not contain the needle to be replaced; the patching logic needs to be reinvestigated. Contact a `llama-cpp-sys-2` developer!");
}

// Replace the runtime read of the file with a compile-time string
let ggml_metal = ggml_metal.replace(
needle,
&format!(r#"NSString * src = @"{ggml_metal_metal}";"#),
);

let patched_ggml_metal_path = out_dir.join("ggml-metal.m");
std::fs::write(&patched_ggml_metal_path, ggml_metal)
.expect("Could not write temporary patched ggml-metal.m");

patched_ggml_metal_path
};

build.file(ggml_metal_path);
}

0 comments on commit 8a73403

Please sign in to comment.