diff --git a/.ci/options.ini b/.ci/options.ini
index e1ae8b9..76ec00f 100644
--- a/.ci/options.ini
+++ b/.ci/options.ini
@@ -5,8 +5,8 @@ llamacpp = https://github.com/ggerganov/llama.cpp
 
 [revisions]
 automatic = 2ec6e9eec481223b8abe50cb5165d5fd9be2d086
-comfyui = 29c2e26724d4982a3e33114eb9064f1a11f4f4ed
-llamacpp = b3376
+comfyui = 7914c47d5afb1c5ffab49d665f9a1ac86a458821
+llamacpp = b3400
 
 [files]
 
diff --git a/airootfs/home/tori/.local/share/tori/patches/0000-automatic-drop-pstate-in-idle.patch b/airootfs/home/tori/.local/share/tori/patches/0000-automatic-drop-pstate-in-idle.patch
index 59138b1..170a4c7 100644
--- a/airootfs/home/tori/.local/share/tori/patches/0000-automatic-drop-pstate-in-idle.patch
+++ b/airootfs/home/tori/.local/share/tori/patches/0000-automatic-drop-pstate-in-idle.patch
@@ -8,7 +8,7 @@
  
  
  debug_install = installer.log.debug if os.environ.get('SD_INSTALL_DEBUG', None) is not None else lambda *args, **kwargs: None
-@@ -260,4 +261,5 @@ def main():
+@@ -262,4 +263,5 @@ def main():
  
  
  if __name__ == "__main__":
@@ -24,7 +24,7 @@
  
  
  class State:
-@@ -81,6 +82,8 @@ class State:
+@@ -83,6 +84,8 @@ class State:
          self.time_start = time.time()
          if self.debug_output:
              log.debug(f'State begin: {self.job}')
@@ -33,7 +33,7 @@
          modules.devices.torch_gc()
  
      def end(self, api=None):
-@@ -90,6 +93,8 @@ class State:
+@@ -92,6 +95,8 @@ class State:
              self.time_start = time.time()
          if self.debug_output:
              log.debug(f'State end: {self.job} time={time.time() - self.time_start:.2f}')
diff --git a/airootfs/home/tori/.local/share/tori/patches/0000-llamacpp-server-drop-pstate-in-idle.patch b/airootfs/home/tori/.local/share/tori/patches/0000-llamacpp-server-drop-pstate-in-idle.patch
index 54e689c..ec9c947 100644
--- a/airootfs/home/tori/.local/share/tori/patches/0000-llamacpp-server-drop-pstate-in-idle.patch
+++ b/airootfs/home/tori/.local/share/tori/patches/0000-llamacpp-server-drop-pstate-in-idle.patch
@@ -1,6 +1,6 @@
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
-@@ -1103,6 +1103,7 @@ struct server_context {
+@@ -1106,6 +1106,7 @@ struct server_context {
              {"id_task", slot.id_task},
          });
  
@@ -8,7 +8,7 @@
          return true;
      }
  
-@@ -1910,6 +1911,7 @@ struct server_context {
+@@ -1913,6 +1914,7 @@ struct server_context {
                      kv_cache_clear();
                  }
  
@@ -16,7 +16,7 @@
                  return;
              }
          }
-@@ -2466,6 +2468,7 @@ inline void signal_handler(int signal) {
+@@ -2485,6 +2487,7 @@ inline void signal_handler(int signal) {
  }
  
  int main(int argc, char ** argv) {
diff --git a/airootfs/home/tori/.local/share/tori/patches/0000-vllm-drop-pstate-in-idle.patch b/airootfs/home/tori/.local/share/tori/patches/0000-vllm-drop-pstate-in-idle.patch
index 379fc38..f5300d2 100644
--- a/airootfs/home/tori/.local/share/tori/patches/0000-vllm-drop-pstate-in-idle.patch
+++ b/airootfs/home/tori/.local/share/tori/patches/0000-vllm-drop-pstate-in-idle.patch
@@ -20,7 +20,7 @@
          self.num_generation_tokens.append(stats.num_generation_tokens_iter)
 --- a/vllm/entrypoints/openai/api_server.py
 +++ b/vllm/entrypoints/openai/api_server.py
-@@ -12,6 +12,7 @@ from fastapi import Request
+@@ -12,6 +12,7 @@ from fastapi import APIRouter, Request
  from fastapi.exceptions import RequestValidationError
  from fastapi.middleware.cors import CORSMiddleware
  from fastapi.responses import JSONResponse, Response, StreamingResponse
@@ -28,30 +28,46 @@
  from prometheus_client import make_asgi_app
  from starlette.routing import Mount
  
-@@ -129,6 +130,7 @@ async def show_version():
- @app.post("/v1/chat/completions")
+@@ -121,6 +122,7 @@ async def show_version():
+ @router.post("/v1/chat/completions")
  async def create_chat_completion(request: ChatCompletionRequest,
                                   raw_request: Request):
 +    set_pstate_high()
      generator = await openai_serving_chat.create_chat_completion(
          request, raw_request)
      if isinstance(generator, ErrorResponse):
-@@ -144,6 +146,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
+@@ -136,6 +138,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
  
- @app.post("/v1/completions")
+ @router.post("/v1/completions")
  async def create_completion(request: CompletionRequest, raw_request: Request):
 +    set_pstate_high()
      generator = await openai_serving_completion.create_completion(
          request, raw_request)
      if isinstance(generator, ErrorResponse):
-@@ -168,6 +171,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
+@@ -273,6 +276,7 @@ def run_server(args, llm_engine=None):
  
  
  if __name__ == "__main__":
 +    set_pstate_low()
-     args = parse_args()
+     # NOTE(simon):
+     # This section should be in sync with vllm/scripts.py for CLI entrypoints.
+     parser = FlexibleArgumentParser(
+--- a/vllm/scripts.py
++++ b/vllm/scripts.py
+@@ -5,6 +5,7 @@ import signal
+ import sys
+ from typing import Optional
  
-     app.add_middleware(
++from nvidia_pstate import set_pstate_high, set_pstate_low
+ from openai import OpenAI
+ 
+ from vllm.entrypoints.openai.api_server import run_server
+@@ -151,4 +152,5 @@ def main():
+ 
+ 
+ if __name__ == "__main__":
++    set_pstate_low()
+     main()
 --- a/vllm/worker/model_runner.py
 +++ b/vllm/worker/model_runner.py
 @@ -10,6 +10,7 @@ import numpy as np
@@ -62,7 +78,7 @@
  
  try:
      from flashinfer import BatchDecodeWithPagedKVCacheWrapper
-@@ -903,6 +904,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
+@@ -1003,6 +1004,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
          Since it is used for decoding-only, it assumes there's only 1 token
          per sequence in the batch.
          """
@@ -70,7 +86,7 @@
          assert not self.model_config.enforce_eager
          logger.info("Capturing the model for CUDA graphs. This may lead to "
                      "unexpected consequences if the model is not static. To "
-@@ -1098,6 +1100,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
+@@ -1206,6 +1208,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
          elapsed_time = end_time - start_time
          # This usually takes < 10 seconds.
          logger.info("Graph capturing finished in %.0f secs.", elapsed_time)
@@ -88,7 +104,7 @@
  
  from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
                           ModelConfig, MultiModalConfig, ParallelConfig,
-@@ -164,6 +165,7 @@ class Worker(LocalOrDistributedWorkerBase):
+@@ -170,6 +171,7 @@ class Worker(LocalOrDistributedWorkerBase):
              You may limit the usage of GPU memory
              by adjusting the `gpu_memory_utilization` parameter.
          """
@@ -96,7 +112,7 @@
          # Profile the memory usage of the model and get the maximum number of
          # cache blocks that can be allocated with the remaining free memory.
          torch.cuda.empty_cache()
-@@ -179,6 +181,8 @@ class Worker(LocalOrDistributedWorkerBase):
+@@ -185,6 +187,8 @@ class Worker(LocalOrDistributedWorkerBase):
          # NOTE(woosuk): Here we assume that the other processes using the same
          # GPU did not change their memory usage during the profiling.
          peak_memory = self.init_gpu_memory - free_gpu_memory
@@ -105,7 +121,7 @@
          assert peak_memory > 0, (
              "Error in memory profiling. This happens when the GPU memory was "
              "not properly cleaned up before initializing the vLLM instance.")
-@@ -195,6 +199,7 @@ class Worker(LocalOrDistributedWorkerBase):
+@@ -201,6 +205,7 @@ class Worker(LocalOrDistributedWorkerBase):
              self.model_runner.remove_all_loras()
          gc.collect()
          torch.cuda.empty_cache()