Skip to content

Commit

Permalink
Remove deprecated self extending context window from server example
Browse files Browse the repository at this point in the history
  • Loading branch information
countzero committed Oct 14, 2024
1 parent c894c4d commit 1fb472e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 28 deletions.
59 changes: 32 additions & 27 deletions examples/server.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -297,39 +297,44 @@ if ($numberOfGPULayers -lt 0) {
$numberOfGPULayers = 0
}

# We are automatically using the self extending context window
# on models that have a trained context window < context size.
# https://arxiv.org/abs/2401.01325
# https://github.com/ggerganov/llama.cpp/issues/4886#issuecomment-1890465266
$groupAttentionFactor = 1
$groupAttentionWidth = 512
Write-Host "Starting llama.cpp server with custom options at http://127.0.0.1:${port}..." -ForegroundColor "Yellow"

if ($contextSize -gt $modelContextLength) {
$commandBinary = "${llamaCppPath}\build\bin\Release\llama-server"

$commandArguments = @(
"--n-predict 1024",
"--port '${port}'",
"--model '${model}'",
"--alias '${alias}'",
"--ctx-size '${contextSize}'",
"--threads '${numberOfPhysicalCores}'",
"--n-gpu-layers '${numberOfGPULayers}'",
"--parallel '${parallel}'",
"--cache-type-k '${kvCacheDataType}'",
"--cache-type-v '${kvCacheDataType}'"
)

Write-Host "Self extending context window from ${modelContextLength} to ${contextSize}..." -ForegroundColor "Yellow"
if ($chatTemplate) {
$commandArguments += "--chat-template '${chatTemplate}'"
}

$groupAttentionFactor = $contextSize / $modelContextLength
$groupAttentionWidth = $modelContextLength / 2
if ($enableFlashAttention) {
$commandArguments += "--flash-attn"
}

Write-Host "Starting llama.cpp server with custom options at http://127.0.0.1:${port}..." -ForegroundColor "Yellow"
if ($verbose) {
$commandArguments += "--verbose"
}

$commandArguments = $commandArguments | ForEach-Object {
if ($commandArguments.IndexOf($_) -ne $commandArguments.Count - 1) {
" $_ ```n"
} else {
" $_ `n"
}
}

$command = "${llamaCppPath}\build\bin\Release\llama-server ``
--n-predict 1024 ``
--port '${port}' ``
--model '${model}' ``
--alias '${alias}' ``
--ctx-size '${contextSize}' ``
--threads '${numberOfPhysicalCores}' ``
--n-gpu-layers '${numberOfGPULayers}' ``
--parallel '${parallel}' ``
--grp-attn-n '${groupAttentionFactor}' ``
--grp-attn-w '${groupAttentionWidth}' ``
--cache-type-k '${kvCacheDataType}' ``
--cache-type-v '${kvCacheDataType}' ``
$(if ($enableFlashAttention) {"--flash-attn"}) ``
$(if ($chatTemplate) {"--chat-template '${chatTemplate}'"}) ``
$(if ($verbose) {"--verbose"})"
$command = $commandBinary + " ```n " + $commandArguments

Write-Host $command -ForegroundColor "Green"

Expand Down
2 changes: 1 addition & 1 deletion vendor/llama.cpp
Submodule llama.cpp updated 158 files

0 comments on commit 1fb472e

Please sign in to comment.