diff --git a/airootfs/home/tori/ComfyUI b/airootfs/home/tori/ComfyUI
index eda8704..c6de09b 160000
--- a/airootfs/home/tori/ComfyUI
+++ b/airootfs/home/tori/ComfyUI
@@ -1 +1 @@
-Subproject commit eda87043862f743b0a0467735f8531f7c4709b3a
+Subproject commit c6de09b02e20d748739fb6af58e196ebdd12825a
diff --git a/airootfs/home/tori/automatic b/airootfs/home/tori/automatic
index bc4b633..82973c4 160000
--- a/airootfs/home/tori/automatic
+++ b/airootfs/home/tori/automatic
@@ -1 +1 @@
-Subproject commit bc4b633e8de3b9392595982e41673177dde1333d
+Subproject commit 82973c49ca491b1d50418b00e37131d308fad6b6
diff --git a/airootfs/home/tori/axolotl b/airootfs/home/tori/axolotl
index 0976781..ff939d8 160000
--- a/airootfs/home/tori/axolotl
+++ b/airootfs/home/tori/axolotl
@@ -1 +1 @@
-Subproject commit 0976781e150afabad4e21f42677fb02e1a969280
+Subproject commit ff939d8a644c27cbe42889e772a1fc5502596759
diff --git a/airootfs/home/tori/koboldcpp b/airootfs/home/tori/koboldcpp
index 7a2de82..f3b7651 160000
--- a/airootfs/home/tori/koboldcpp
+++ b/airootfs/home/tori/koboldcpp
@@ -1 +1 @@
-Subproject commit 7a2de82c96906ae7d331ce229948ebcf55601f7c
+Subproject commit f3b7651102c3ce3e4f331b93137dc32d752eada0
diff --git a/airootfs/home/tori/llama.cpp b/airootfs/home/tori/llama.cpp
index 19885d2..b06c16e 160000
--- a/airootfs/home/tori/llama.cpp
+++ b/airootfs/home/tori/llama.cpp
@@ -1 +1 @@
-Subproject commit 19885d205e768579ab090d1e99281cae58c21b54
+Subproject commit b06c16ef9f81d84da520232c125d4d8a1d273736
diff --git a/airootfs/home/tori/text-generation-webui b/airootfs/home/tori/text-generation-webui
index 1934cb6..7cf1402 160000
--- a/airootfs/home/tori/text-generation-webui
+++ b/airootfs/home/tori/text-generation-webui
@@ -1 +1 @@
-Subproject commit 1934cb61ef879815644277c01c7295acbae542d8
+Subproject commit 7cf1402bde48fd76af501d5efecb34227bf4d082
diff --git a/airootfs/home/tori/vllm b/airootfs/home/tori/vllm
index eeab52a..f408d05 160000
--- a/airootfs/home/tori/vllm
+++ b/airootfs/home/tori/vllm
@@ -1 +1 @@
-Subproject commit eeab52a4ff02e15f970880a689df2861ad173770
+Subproject commit f408d05c523c25e2f638a13cb34a2dab3dcb2754
diff --git a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch
index fbdeff3..978e657 100644
--- a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch
+++ b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch
@@ -1,6 +1,6 @@
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
-@@ -94,7 +94,7 @@ struct server_task_multi {
+@@ -95,7 +95,7 @@ struct server_task_multi {
  
  struct slot_params {
      bool stream       = true;
@@ -9,7 +9,7 @@
  
      uint32_t seed      = -1; // RNG seed
      int32_t  n_keep    =  0; // number of tokens to keep from initial prompt
-@@ -825,7 +825,7 @@ struct server_context {
+@@ -827,7 +827,7 @@ struct server_context {
          }
  
          slot.params.stream             = json_value(data, "stream",            false);
@@ -18,14 +18,3 @@
          slot.params.n_predict          = json_value(data, "n_predict",         default_params.n_predict);
          slot.sparams.top_k             = json_value(data, "top_k",             default_sparams.top_k);
          slot.sparams.top_p             = json_value(data, "top_p",             default_sparams.top_p);
---- a/examples/server/utils.hpp
-+++ b/examples/server/utils.hpp
-@@ -353,7 +353,7 @@ static json oaicompat_completion_params_parse(
-     llama_sampling_params default_sparams;
-     llama_params["model"]             = json_value(body,   "model",             std::string("unknown"));
-     llama_params["prompt"]            = format_chat(model, chat_template,       body["messages"]);
--    llama_params["cache_prompt"]      = json_value(body,   "cache_prompt",      false);
-+    llama_params["cache_prompt"]      = json_value(body,   "cache_prompt",      true);
-     llama_params["temperature"]       = json_value(body,   "temperature",       0.0);
-     llama_params["top_k"]             = json_value(body,   "top_k",             default_sparams.top_k);
-     llama_params["top_p"]             = json_value(body,   "top_p",             1.0);
diff --git a/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch b/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch
deleted file mode 100644
index 01fcd42..0000000
--- a/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch
+++ /dev/null
@@ -1,23 +0,0 @@
---- a/setup.py
-+++ b/setup.py
-@@ -23,7 +23,7 @@ ROOT_DIR = os.path.dirname(__file__)
- MAIN_CUDA_VERSION = "12.1"
- 
- # Supported NVIDIA GPU architectures.
--NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
-+NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
- ROCM_SUPPORTED_ARCHS = {"gfx908", "gfx90a", "gfx942", "gfx1100"}
- # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
- 
-@@ -222,9 +222,9 @@ if _is_cuda() and not compute_capabilities:
-     device_count = torch.cuda.device_count()
-     for i in range(device_count):
-         major, minor = torch.cuda.get_device_capability(i)
--        if major < 7:
-+        if major < 6:
-             raise RuntimeError(
--                "GPUs with compute capability below 7.0 are not supported.")
-+                "GPUs with compute capability below 6.0 are not supported.")
-         compute_capabilities.add(f"{major}.{minor}")
- 
- ext_modules = []
diff --git a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh b/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
deleted file mode 100644
index 61550e1..0000000
--- a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-set -eu
-
-# vllm patches
-pushd "vllm"
-  # enable other architectures
-  patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0100-vllm-enable-other-archs.patch"
-popd
diff --git a/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 b/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2
index 3ac0612..6465f1b 100644
--- a/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2
+++ b/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2
@@ -11,8 +11,8 @@ pushd "vllm"
   export MAX_JOBS=1
 
   # define supported architectures
-  export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"
-
+  export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+""
   # cuda home directory
   export CUDA_HOME=/opt/cuda