diff --git a/airootfs/home/tori/ComfyUI b/airootfs/home/tori/ComfyUI index eda8704..c6de09b 160000 --- a/airootfs/home/tori/ComfyUI +++ b/airootfs/home/tori/ComfyUI @@ -1 +1 @@ -Subproject commit eda87043862f743b0a0467735f8531f7c4709b3a +Subproject commit c6de09b02e20d748739fb6af58e196ebdd12825a diff --git a/airootfs/home/tori/automatic b/airootfs/home/tori/automatic index bc4b633..82973c4 160000 --- a/airootfs/home/tori/automatic +++ b/airootfs/home/tori/automatic @@ -1 +1 @@ -Subproject commit bc4b633e8de3b9392595982e41673177dde1333d +Subproject commit 82973c49ca491b1d50418b00e37131d308fad6b6 diff --git a/airootfs/home/tori/axolotl b/airootfs/home/tori/axolotl index 0976781..ff939d8 160000 --- a/airootfs/home/tori/axolotl +++ b/airootfs/home/tori/axolotl @@ -1 +1 @@ -Subproject commit 0976781e150afabad4e21f42677fb02e1a969280 +Subproject commit ff939d8a644c27cbe42889e772a1fc5502596759 diff --git a/airootfs/home/tori/koboldcpp b/airootfs/home/tori/koboldcpp index 7a2de82..f3b7651 160000 --- a/airootfs/home/tori/koboldcpp +++ b/airootfs/home/tori/koboldcpp @@ -1 +1 @@ -Subproject commit 7a2de82c96906ae7d331ce229948ebcf55601f7c +Subproject commit f3b7651102c3ce3e4f331b93137dc32d752eada0 diff --git a/airootfs/home/tori/llama.cpp b/airootfs/home/tori/llama.cpp index 19885d2..b06c16e 160000 --- a/airootfs/home/tori/llama.cpp +++ b/airootfs/home/tori/llama.cpp @@ -1 +1 @@ -Subproject commit 19885d205e768579ab090d1e99281cae58c21b54 +Subproject commit b06c16ef9f81d84da520232c125d4d8a1d273736 diff --git a/airootfs/home/tori/text-generation-webui b/airootfs/home/tori/text-generation-webui index 1934cb6..7cf1402 160000 --- a/airootfs/home/tori/text-generation-webui +++ b/airootfs/home/tori/text-generation-webui @@ -1 +1 @@ -Subproject commit 1934cb61ef879815644277c01c7295acbae542d8 +Subproject commit 7cf1402bde48fd76af501d5efecb34227bf4d082 diff --git a/airootfs/home/tori/vllm b/airootfs/home/tori/vllm index eeab52a..f408d05 160000 --- a/airootfs/home/tori/vllm +++ b/airootfs/home/tori/vllm @@ -1 +1 @@ -Subproject commit eeab52a4ff02e15f970880a689df2861ad173770 +Subproject commit f408d05c523c25e2f638a13cb34a2dab3dcb2754 diff --git a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch index fbdeff3..978e657 100644 --- a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch +++ b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch @@ -1,6 +1,6 @@ --- a/examples/server/server.cpp +++ b/examples/server/server.cpp -@@ -94,7 +94,7 @@ struct server_task_multi { +@@ -95,7 +95,7 @@ struct server_task_multi { struct slot_params { bool stream = true; @@ -9,7 +9,7 @@ uint32_t seed = -1; // RNG seed int32_t n_keep = 0; // number of tokens to keep from initial prompt -@@ -825,7 +825,7 @@ struct server_context { +@@ -827,7 +827,7 @@ struct server_context { } slot.params.stream = json_value(data, "stream", false); @@ -18,14 +18,3 @@ slot.params.n_predict = json_value(data, "n_predict", default_params.n_predict); slot.sparams.top_k = json_value(data, "top_k", default_sparams.top_k); slot.sparams.top_p = json_value(data, "top_p", default_sparams.top_p); ---- a/examples/server/utils.hpp -+++ b/examples/server/utils.hpp -@@ -353,7 +353,7 @@ static json oaicompat_completion_params_parse( - llama_sampling_params default_sparams; - llama_params["model"] = json_value(body, "model", std::string("unknown")); - llama_params["prompt"] = format_chat(model, chat_template, body["messages"]); -- llama_params["cache_prompt"] = json_value(body, "cache_prompt", false); -+ llama_params["cache_prompt"] = json_value(body, "cache_prompt", true); - llama_params["temperature"] = json_value(body, "temperature", 0.0); - llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k); - llama_params["top_p"] = json_value(body, "top_p", 1.0); diff --git a/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch b/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch deleted file mode 100644 index 01fcd42..0000000 --- a/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch +++ /dev/null @@ -1,23 +0,0 @@ ---- a/setup.py -+++ b/setup.py -@@ -23,7 +23,7 @@ ROOT_DIR = os.path.dirname(__file__) - MAIN_CUDA_VERSION = "12.1" - - # Supported NVIDIA GPU architectures. --NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} -+NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} - ROCM_SUPPORTED_ARCHS = {"gfx908", "gfx90a", "gfx942", "gfx1100"} - # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) - -@@ -222,9 +222,9 @@ if _is_cuda() and not compute_capabilities: - device_count = torch.cuda.device_count() - for i in range(device_count): - major, minor = torch.cuda.get_device_capability(i) -- if major < 7: -+ if major < 6: - raise RuntimeError( -- "GPUs with compute capability below 7.0 are not supported.") -+ "GPUs with compute capability below 6.0 are not supported.") - compute_capabilities.add(f"{major}.{minor}") - - ext_modules = [] diff --git a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh b/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh deleted file mode 100644 index 61550e1..0000000 --- a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -eu - -# vllm patches -pushd "vllm" - # enable other architectures - patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0100-vllm-enable-other-archs.patch" -popd diff --git a/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 b/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 index 3ac0612..6465f1b 100644 --- a/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 +++ b/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 @@ -11,8 +11,8 @@ pushd "vllm" export MAX_JOBS=1 # define supported architectures - export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" - + export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" +"" # cuda home directory export CUDA_HOME=/opt/cuda