Skip to content

Commit

Permalink
Update repositories (#28)
Browse files Browse the repository at this point in the history
* Update repositories

* fix

* fixes
  • Loading branch information
sasha0552 authored Mar 14, 2024
1 parent f4aca5b commit 864a075
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 63 deletions.
2 changes: 1 addition & 1 deletion airootfs/home/tori/automatic
2 changes: 1 addition & 1 deletion airootfs/home/tori/axolotl
2 changes: 1 addition & 1 deletion airootfs/home/tori/koboldcpp
Submodule koboldcpp updated 90 files
+46 −1 .github/workflows/server.yml
+1 −0 .gitignore
+7 −1 CMakeLists.txt
+22 −14 Makefile
+1 −1 class.py
+22 −10 colab.ipynb
+6 −1 common/CMakeLists.txt
+25 −16 common/common.cpp
+9 −3 common/common.h
+16 −0 common/grammar-parser.cpp
+4 −4 common/log.h
+79 −0 common/sampling.cpp
+7 −0 common/sampling.h
+121 −0 convert-hf-to-gguf.py
+0 −13 convert.py
+1 −0 examples/CMakeLists.txt
+8 −5 examples/batched-bench/batched-bench.cpp
+2 −1 examples/batched/batched.cpp
+2 −4 examples/benchmark/benchmark-matmult.cpp
+20 −18 examples/embedding/embedding.cpp
+5 −0 examples/gritlm/CMakeLists.txt
+229 −0 examples/gritlm/gritlm.cpp
+27 −3 examples/llama-bench/llama-bench.cpp
+51 −2 examples/llama.android/app/src/main/cpp/llama-android.cpp
+2 −2 examples/llama.android/app/src/main/java/com/example/llama/Llm.kt
+1 −54 examples/llava/clip.cpp
+30 −0 examples/llava/quantclip.cpp
+21 −0 examples/main/main.cpp
+13 −7 examples/parallel/parallel.cpp
+97 −51 examples/perplexity/perplexity.cpp
+34 −0 examples/server-embd.py
+8 −2 examples/server/CMakeLists.txt
+81 −9 examples/server/README.md
+88 −0 examples/server/bench/README.md
+120 −0 examples/server/bench/script.js
+235 −234 examples/server/completion.js.hpp
+0 −225 examples/server/oai.hpp
+10 −10 examples/server/public/completion.js
+2,085 −2,067 examples/server/server.cpp
+94 −0 examples/server/tests/features/embeddings.feature
+47 −19 examples/server/tests/features/environment.py
+3 −48 examples/server/tests/features/parallel.feature
+3 −2 examples/server/tests/features/security.feature
+30 −38 examples/server/tests/features/server.feature
+201 −100 examples/server/tests/features/steps/steps.py
+1 −0 examples/server/tests/requirements.txt
+358 −396 examples/server/utils.hpp
+1 −0 examples/speculative/README.md
+168 −48 examples/speculative/speculative.cpp
+1 −0 expose.cpp
+6 −0 expose.h
+4 −3 ggml-backend-impl.h
+18 −21 ggml-backend.c
+16 −15 ggml-backend.h
+1,830 −0 ggml-common.h
+107 −1,059 ggml-cuda.cu
+6 −2 ggml-impl.h
+2 −2 ggml-kompute.cpp
+68 −12 ggml-metal.m
+90 −910 ggml-metal.metal
+2 −2 ggml-opencl.cpp
+472 −1,089 ggml-quants.c
+27 −259 ggml-quants.h
+1,939 −448 ggml-sycl.cpp
+39,336 −43,461 ggml-vulkan-shaders.hpp
+1,286 −772 ggml-vulkan.cpp
+1 −0 ggml-vulkan.h
+507 −349 ggml.c
+43 −22 ggml.h
+712 −459 ggml_vk_generate_shaders.py
+41 −0 gguf-py/gguf/constants.py
+12 −0 gguf-py/gguf/gguf_writer.py
+44 −2 gguf-py/gguf/tensor_mapping.py
+237 −15 gpttype_adapter.cpp
+64 −2 kcpp_docs.embd
+596 −165 klite.embd
+189 −39 koboldcpp.py
+1,038 −277 llama.cpp
+25 −13 llama.h
+4 −0 model_adapter.cpp
+1 −0 model_adapter.h
+6 −0 otherarch/ggml_v3.c
+7 −0 otherarch/otherarch.h
+9 −6 otherarch/sdcpp/model.cpp
+23 −10 otherarch/sdcpp/sdtype_adapter.cpp
+0 −7,987 otherarch/sdcpp/thirdparty/stb_image.h
+17 −4 otherarch/sdcpp/util.cpp
+3 −1 otherarch/sdcpp/util.h
+1,672 −0 unicode.cpp
+16 −774 unicode.h
2 changes: 1 addition & 1 deletion airootfs/home/tori/llama.cpp
2 changes: 1 addition & 1 deletion airootfs/home/tori/text-generation-webui
Submodule text-generation-webui updated 52 files
+5 −1 Colab-TextGen-GPU.ipynb
+15 −10 README.md
+6 −4 docs/04 - Model Tab.md
+0 −6 docs/12 - OpenAI API.md
+4 −4 extensions/openai/completions.py
+0 −4 extensions/openai/requirements.txt
+12 −40 extensions/superbooga/chromadb.py
+1 −1 extensions/superbooga/requirements.txt
+11 −22 extensions/superboogav2/api.py
+7 −7 extensions/superboogav2/benchmark.py
+16 −15 extensions/superboogav2/chat_handler.py
+57 −85 extensions/superboogav2/chromadb.py
+19 −21 extensions/superboogav2/data_preprocessor.py
+13 −12 extensions/superboogav2/data_processor.py
+3 −2 extensions/superboogav2/download_urls.py
+3 −4 extensions/superboogav2/notebook_handler.py
+14 −12 extensions/superboogav2/optimize.py
+5 −7 extensions/superboogav2/parameters.py
+2 −2 extensions/superboogav2/requirements.txt
+20 −22 extensions/superboogav2/script.py
+2 −1 extensions/superboogav2/utils.py
+114 −0 modules/cache_utils.py
+4 −4 modules/chat.py
+3 −0 modules/exllamav2.py
+5 −0 modules/exllamav2_hf.py
+1 −1 modules/extensions.py
+22 −0 modules/llama_cpp_python_hijack.py
+6 −0 modules/loaders.py
+3 −0 modules/shared.py
+7 −0 modules/text_generation.py
+3 −0 modules/ui.py
+4 −2 modules/ui_chat.py
+4 −1 modules/ui_model_menu.py
+1 −1 modules/ui_parameters.py
+187 −94 one_click.py
+31 −26 requirements.txt
+19 −10 requirements_amd.txt
+17 −8 requirements_amd_noavx2.txt
+14 −7 requirements_apple_intel.txt
+16 −9 requirements_apple_silicon.txt
+12 −5 requirements_cpu_only.txt
+12 −5 requirements_cpu_only_noavx2.txt
+31 −26 requirements_noavx2.txt
+8 −1 requirements_nowheels.txt
+3 −0 start_linux.sh
+3 −0 start_macos.sh
+18 −1 start_windows.bat
+1 −1 update_wizard_linux.sh
+1 −1 update_wizard_macos.sh
+1 −1 update_wizard_windows.bat
+1 −1 update_wizard_wsl.bat
+4 −1 wsl.sh
2 changes: 1 addition & 1 deletion airootfs/home/tori/vllm
Submodule vllm updated 147 files
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,35 @@

sampler_order_max = 7
stop_token_max = 16
@@ -380,6 +381,7 @@ def load_model(model_filename):
@@ -397,6 +398,7 @@ def load_model(model_filename):
return ret

def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
+ set_pstate_high()
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
inputs = generation_inputs()
inputs.prompt = prompt.encode("UTF-8")
@@ -478,6 +480,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
@@ -489,6 +491,7 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
if pendingabortkey!="" and pendingabortkey==genkey:
print(f"\nDeferred Abort for GenKey: {pendingabortkey}")
pendingabortkey = ""
+ set_pstate_low()
return ""
else:
ret = handle.generate(inputs)
@@ -500,6 +503,7 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
sindex = outstr.find(trim_str)
if sindex != -1 and trim_str!="":
outstr = outstr[:sindex]
+ set_pstate_low()
return outstr


@@ -2545,6 +2548,7 @@ def sanitize_string(input_string):
@@ -2679,6 +2683,7 @@ def sanitize_string(input_string):
return sanitized_string

def main(launch_args,start_server=True):
+ set_pstate_low()
global args, friendlymodelname, friendlysdmodelname, fullsdmodelpath
global args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password
args = launch_args
embedded_kailite = None
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
--- a/examples/server/oai.hpp
+++ b/examples/server/oai.hpp
@@ -33,7 +33,7 @@ inline static json oaicompat_completion_params_parse(
llama_sampling_params default_sparams;
llama_params["model"] = json_value(body, "model", std::string("unknown"));
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
- llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
+ llama_params["cache_prompt"] = json_value(body, "cache_prompt", true);
llama_params["temperature"] = json_value(body, "temperature", 0.0);
llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k);
llama_params["top_p"] = json_value(body, "top_p", 1.0);
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -68,7 +68,7 @@ enum slot_command {
@@ -94,7 +94,7 @@ struct server_task_multi {

struct slot_params {
bool stream = true;
Expand All @@ -20,12 +9,23 @@

uint32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt
@@ -552,7 +552,7 @@ struct llama_server_context
@@ -825,7 +825,7 @@ struct server_context {
}

slot->params.stream = json_value(data, "stream", false);
- slot->params.cache_prompt = json_value(data, "cache_prompt", false);
+ slot->params.cache_prompt = json_value(data, "cache_prompt", true);
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot.params.stream = json_value(data, "stream", false);
- slot.params.cache_prompt = json_value(data, "cache_prompt", false);
+ slot.params.cache_prompt = json_value(data, "cache_prompt", true);
slot.params.n_predict = json_value(data, "n_predict", default_params.n_predict);
slot.sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot.sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -353,7 +353,7 @@ static json oaicompat_completion_params_parse(
llama_sampling_params default_sparams;
llama_params["model"] = json_value(body, "model", std::string("unknown"));
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
- llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
+ llama_params["cache_prompt"] = json_value(body, "cache_prompt", true);
llama_params["temperature"] = json_value(body, "temperature", 0.0);
llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k);
llama_params["top_p"] = json_value(body, "top_p", 1.0);

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,13 @@ pushd "text-generation-webui"

{% if ROCm %}
# extract pytorch version
index_url=$(grep -o 'https://download.pytorch.org/whl/rocm[0-9.]*' one_click.py)
index_url=$(grep -m1 -o 'https://download.pytorch.org/whl/rocm[0-9.]*' one_click.py)

# install pytorch
pip3 install torch torchvision torchaudio --index-url "$index_url"

# install dependencies (rocm)
pip3 install -r requirements_amd.txt
{% endif %}

# openai extensions dependencies
pip3 install -r extensions/openai/requirements.txt
deactivate
popd

0 comments on commit 864a075

Please sign in to comment.