Update repositories

sasha0552 · Mar 25, 2024 · de3395b · de3395b
1 parent c14b128
commit de3395b
Show file tree

Hide file tree

Showing 11 changed files with 11 additions and 53 deletions.
diff --git a/airootfs/home/tori/ComfyUI b/airootfs/home/tori/ComfyUI
diff --git a/airootfs/home/tori/automatic b/airootfs/home/tori/automatic
diff --git a/airootfs/home/tori/axolotl b/airootfs/home/tori/axolotl
diff --git a/airootfs/home/tori/koboldcpp b/airootfs/home/tori/koboldcpp
diff --git a/airootfs/home/tori/llama.cpp b/airootfs/home/tori/llama.cpp
diff --git a/airootfs/home/tori/text-generation-webui b/airootfs/home/tori/text-generation-webui
diff --git a/airootfs/home/tori/vllm b/airootfs/home/tori/vllm
diff --git a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch
@@ -1,6 +1,6 @@
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
-@@ -94,7 +94,7 @@ struct server_task_multi {
+@@ -95,7 +95,7 @@ struct server_task_multi {
 
  struct slot_params {
      bool stream       = true;
@@ -9,7 +9,7 @@
 
      uint32_t seed      = -1; // RNG seed
      int32_t  n_keep    =  0; // number of tokens to keep from initial prompt
-@@ -825,7 +825,7 @@ struct server_context {
+@@ -827,7 +827,7 @@ struct server_context {
          }
 
          slot.params.stream             = json_value(data, "stream",            false);
@@ -18,14 +18,3 @@
          slot.params.n_predict          = json_value(data, "n_predict",         default_params.n_predict);
          slot.sparams.top_k             = json_value(data, "top_k",             default_sparams.top_k);
          slot.sparams.top_p             = json_value(data, "top_p",             default_sparams.top_p);
---- a/examples/server/utils.hpp
-+++ b/examples/server/utils.hpp
-@@ -353,7 +353,7 @@ static json oaicompat_completion_params_parse(
-     llama_sampling_params default_sparams;
-     llama_params["model"]             = json_value(body,   "model",             std::string("unknown"));
-     llama_params["prompt"]            = format_chat(model, chat_template,       body["messages"]);
--    llama_params["cache_prompt"]      = json_value(body,   "cache_prompt",      false);
-+    llama_params["cache_prompt"]      = json_value(body,   "cache_prompt",      true);
-     llama_params["temperature"]       = json_value(body,   "temperature",       0.0);
-     llama_params["top_k"]             = json_value(body,   "top_k",             default_sparams.top_k);
-     llama_params["top_p"]             = json_value(body,   "top_p",             1.0);
diff --git a/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch b/airootfs/root/customize_airootfs/patches/0100-vllm-enable-other-archs.patch
diff --git a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh b/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
diff --git a/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 b/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2
@@ -11,8 +11,8 @@ pushd "vllm"
   export MAX_JOBS=1
 
   # define supported architectures
-  export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"
-
+  export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+""
   # cuda home directory
   export CUDA_HOME=/opt/cuda
+2 −2		comfy/controlnet.py
+1 −1		comfy/k_diffusion/sampling.py
+1 −1		comfy/latent_formats.py
+3 −6		comfy/ldm/cascade/stage_a.py
+30 −0		comfy/model_base.py
+56 −46		comfy/model_management.py
+106 −29		comfy/model_patcher.py
+3 −0		comfy/model_sampling.py
+14 −12		comfy/ops.py
+3 −2		comfy/samplers.py
+41 −1		comfy/supported_models.py
+28 −0		comfy_extras/nodes_custom_sampler.py
+3 −3		comfy_extras/nodes_images.py
+1 −1		comfy_extras/nodes_perpneg.py
+47 −6		comfy_extras/nodes_stable3d.py
+1 −1		comfy_extras/nodes_stable_cascade.py
+28 −0		comfy_extras/nodes_video_model.py
+1 −5		custom_nodes/websocket_image_save.py
+0 −1		execution.py
+1 −0		main.py
+159 −0		script_examples/websockets_api_example_ws_images.py
+2 −0		.gitignore
+24 −22		Makefile
+12 −2		common/common.cpp
+2 −1		common/common.h
+7 −0		common/sampling.cpp
+2 −0		examples/batched-bench/batched-bench.cpp
+1 −1		examples/embedding/embedding.cpp
+43 −10		examples/llama-bench/llama-bench.cpp
+2 −0		examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
+2 −1		examples/perplexity/perplexity.cpp
+51 −57		examples/server/server.cpp
+1 −0		examples/server/tests/features/embeddings.feature
+8 −0		examples/server/tests/features/steps/steps.py
+11 −1		examples/server/utils.hpp
+45 −64		ggml-alloc.c
+13 −5		ggml-alloc.h
+14 −3		ggml-backend-impl.h
+358 −135		ggml-backend.c
+41 −17		ggml-backend.h
+146 −29		ggml-cuda.cu
+5 −0		ggml-kompute.cpp
+9 −4		ggml-metal.m
+0 −3		ggml-metal.metal
+23 −13		ggml-sycl.cpp
+5 −0		ggml-vulkan.cpp
+68 −45		ggml.c
+52 −28		koboldcpp.py
+608 −523		llama.cpp
+8 −1		llama.h
+2 −1		extensions/openai/typing.py
+8 −7		modules/cache_utils.py
+10 −3		modules/chat.py
+16 −3		modules/html_generator.py
+2 −1		modules/shared.py
+4 −2		modules/ui.py
+41 −37		modules/ui_chat.py
+2 −2		modules/ui_model_menu.py
+6 −1		settings-template.yaml