Skip to content

Commit

Permalink
Merge pull request #637 from vlovich/logging-intercept
Browse files Browse the repository at this point in the history
Redirect llama.cpp logs into tracing
  • Loading branch information
MarcusDunn authored Feb 5, 2025
2 parents 8fc28b3 + 373f8c6 commit 773d2c0
Show file tree
Hide file tree
Showing 9 changed files with 508 additions and 31 deletions.
133 changes: 133 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ members = [
# core library deps
thiserror = "1"
tracing = "0.1"
tracing-core = "0.1"

# examples and benchmarks
hf-hub = { version = "0.3.2" }
Expand All @@ -21,6 +22,7 @@ cc = "1.2.11"
anyhow = "1.0.95"
clap = "4.5.27"
encoding_rs = "0.8.35"
tracing-subscriber = { version = "0.3", features = ["json"] }

[workspace.lints.rust]
missing_docs = { level = "warn" }
Expand Down
1 change: 1 addition & 0 deletions examples/simple/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ hf-hub = { workspace = true }
clap = { workspace = true , features = ["derive"] }
anyhow = { workspace = true }
encoding_rs = { workspace = true }
tracing-subscriber = { workspace = true }

[features]
cuda = ["llama-cpp-2/cuda"]
Expand Down
14 changes: 13 additions & 1 deletion examples/simple/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use anyhow::{anyhow, bail, Context, Result};
use clap::Parser;
use hf_hub::api::sync::ApiBuilder;
use llama_cpp_2::context::params::LlamaContextParams;
use llama_cpp_2::ggml_time_us;
use llama_cpp_2::{ggml_time_us, send_logs_to_tracing, LogOptions};
use llama_cpp_2::llama_backend::LlamaBackend;
use llama_cpp_2::llama_batch::LlamaBatch;
use llama_cpp_2::model::params::kv_overrides::ParamOverrideValue;
Expand Down Expand Up @@ -67,6 +67,12 @@ struct Args {
help = "size of the prompt context (default: loaded from themodel)"
)]
ctx_size: Option<NonZeroU32>,
#[arg(
short = 'v',
long,
help = "enable verbose llama.cpp logs",
)]
verbose: bool,
}

/// Parse a single key-value pair
Expand Down Expand Up @@ -132,8 +138,14 @@ fn main() -> Result<()> {
threads,
threads_batch,
ctx_size,
verbose,
} = Args::parse();

if verbose {
tracing_subscriber::fmt().init();
}
send_logs_to_tracing(LogOptions::default().with_logs_enabled(verbose));

// init LLM
let backend = LlamaBackend::init()?;

Expand Down
1 change: 1 addition & 0 deletions llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ enumflags2 = "0.7.11"
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
thiserror = { workspace = true }
tracing = { workspace = true }
tracing-core = { workspace = true }

[dev-dependencies]
encoding_rs = { workspace = true }
Expand Down
74 changes: 74 additions & 0 deletions llama-cpp-2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use std::string::FromUtf8Error;
pub mod context;
pub mod llama_backend;
pub mod llama_batch;
mod log;
pub mod model;
pub mod sampling;
pub mod timing;
Expand Down Expand Up @@ -323,3 +324,76 @@ pub fn ggml_time_us() -> i64 {
pub fn llama_supports_mlock() -> bool {
unsafe { llama_cpp_sys_2::llama_supports_mlock() }
}

/// Options to configure how llama.cpp logs are intercepted.
#[derive(Default, Debug, Clone)]
pub struct LogOptions {
disabled: bool,
}

impl LogOptions {
/// If enabled, logs are sent to tracing. If disabled, all logs are suppressed. Default is for
/// logs to be sent to tracing.
pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
self.disabled = !enabled;
self
}
}

extern "C" fn logs_to_trace(
level: llama_cpp_sys_2::ggml_log_level,
text: *const ::std::os::raw::c_char,
data: *mut ::std::os::raw::c_void,
) {
// In the "fast-path" (i.e. the vast majority of logs) we want to avoid needing to take the log state
// lock at all. Similarly, we try to avoid any heap allocations within this function. This is accomplished
// by being a dummy pass-through to tracing in the normal case of DEBUG/INFO/WARN/ERROR logs that are
// newline terminated and limiting the slow-path of locks and/or heap allocations for other cases.
use std::borrow::Borrow;

let log_state = unsafe { &*(data as *const log::State) };

let text = unsafe { std::ffi::CStr::from_ptr(text) };
let text = text.to_string_lossy();
let text: &str = text.borrow();

if log_state.options.disabled {
return;
}

// As best I can tell llama.cpp / ggml require all log format strings at call sites to have the '\n'.
// If it's missing, it means that you expect more logs via CONT (or there's a typo in the codebase). To
// distinguish typo from intentional support for CONT, we have to buffer until the next message comes in
// to know how to flush it.

if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
log_state.cont_buffered_log(text);
} else if text.ends_with('\n') {
log_state.emit_non_cont_line(level, text);
} else {
log_state.buffer_non_cont(level, text);
}
}

/// Redirect llama.cpp logs into tracing.
pub fn send_logs_to_tracing(options: LogOptions) {
// TODO: Reinitialize the state to support calling send_logs_to_tracing multiple times.

// We set up separate log states for llama.cpp and ggml to make sure that CONT logs between the two
// can't possibly interfere with each other. In other words, if llama.cpp emits a log without a trailing
// newline and calls a GGML function, the logs won't be weirdly intermixed and instead we'll llama.cpp logs
// will CONT previous llama.cpp logs and GGML logs will CONT previous ggml logs.
let llama_heap_state = Box::as_ref(
log::LLAMA_STATE
.get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
) as *const _;
let ggml_heap_state = Box::as_ref(
log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
) as *const _;

unsafe {
// GGML has to be set after llama since setting llama sets ggml as well.
llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
}
}
Loading

0 comments on commit 773d2c0

Please sign in to comment.