Skip to content

Commit

Permalink
Merge branch 'main' into gpu-support
Browse files Browse the repository at this point in the history
  • Loading branch information
gaby authored Feb 24, 2024
2 parents 235d65c + e1f966a commit 9dc8f42
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 98 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,13 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https
| **Alfred** | 40B-1023 |
| **Code** | 13B, 33B |
| **CodeLLaMA** | 7B, 7B-Instruct, 7B-Python, 13B, 13B-Instruct, 13B-Python, 34B, 34B-Instruct, 34B-Python |
| **Gemma** | 2B, 7B |
| **Falcon** | 7B, 7B-Instruct, 40B, 40B-Instruct |
| **LLaMA 2** | 7B, 7B-Chat, 7B-Coder, 13B, 13B-Chat, 70B, 70B-Chat, 70B-OASST |
| **LLaMA Pro** | 8B, 8B-Instruct |
| **Med42** | 70B |
| **Medalpaca** | 13B |
| **Medicine-LLM** | 13B |
| **Medicine** | Chat, LLM |
| **Meditron** | 7B, 7B-Chat, 70B |
| **Mistral** | 7B-V0.1, 7B-Instruct-v0.2, 7B-OpenOrca |
| **MistralLite** | 7B |
Expand All @@ -79,6 +81,7 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https
| **Python Code** | 13B, 33B |
| **PsyMedRP** | 13B-v1, 20B-v1 |
| **Starling LM** | 7B-Alpha |
| **TinyLlama** | 1.1B |
| **Vicuna** | 7B-v1.5, 13B-v1.5, 33B-v1.3, 33B-Coder |
| **WizardLM** | 7B-v1.0, 13B-v1.2, 70B-v1.0 |
| **Zephyr** | 3B, 7B-Alpha, 7B-Beta |
Expand Down
18 changes: 9 additions & 9 deletions api/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ pydantic = "^1.10.14"
sse-starlette = "^1.8.2"
starlette = "^0.26.1"
typing-extensions = "^4.9.0"
urllib3 = "^2.2.0"
urllib3 = "^2.2.1"
fastapi = "^0.95.1"
huggingface-hub = "^0.20.3"
requests = "^2.31.0"
langchain = "^0.0.180"
loguru = "^0.7.2"
redis = {extras = ["hiredis"], version = "^5.0.1"}
pytest = "^8.0.0"
pytest = "^8.0.1"
hypercorn = {extras = ["trio"], version = "^0.16.0"}

[tool.ruff]
Expand Down
98 changes: 95 additions & 3 deletions api/src/serge/data/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,33 @@
}
]
},
{
"name": "Gemma",
"models": [
{
"name": "Gemma-2B",
"repo": "MaziyarPanahi/gemma-2b-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "gemma-2b.Q4_K_M.gguf",
"disk_space": 1495245728.0
}
]
},
{
"name": "Gemma-7B",
"repo": "MaziyarPanahi/gemma-7b-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "gemma-7b.Q4_K_M.gguf",
"disk_space": 5127231648.0
}
]
}
]
},
{
"name": "LLaMA_2",
"models": [
Expand Down Expand Up @@ -287,6 +314,33 @@
]
}
]
},
{
"name": "LLaMA-Pro",
"models": [
{
"name": "Llama-Pro-8B",
"repo": "TheBloke/LLaMA-Pro-8B-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "llama-pro-8b.Q4_K_M.gguf",
"disk_space": 5055758336.0
}
]
},
{
"name": "Llama-Pro-8B-Instruct",
"repo": "TheBloke/LLaMA-Pro-8B-Instruct-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "llama-pro-8b-instruct.Q4_K_M.gguf",
"disk_space": 5055758688.0
}
]
}
]
},
{
"name": "Med42",
Expand Down Expand Up @@ -321,10 +375,32 @@
]
},
{
"name": "medicine-LLM",
"name": "Medicine",
"models": [
{
"name": "Medicine LLM 13B",
"name": "Medicine-Chat",
"repo": "TheBloke/medicine-chat-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "medicine-chat.Q4_K_M.gguf",
"disk_space": 4081010048.0
}
]
},
{
"name": "Medicine-LLM",
"repo": "TheBloke/medicine-LLM-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "medicine-llm.Q4_K_M.gguf",
"disk_space": 4081009920.0
}
]
},
{
"name": "Medicine-LLM-13B",
"repo": "TheBloke/medicine-LLM-13B-GGUF",
"files": [
{
Expand All @@ -335,7 +411,7 @@
]
}
]
},
},
{
"name": "Meditron",
"models": [
Expand Down Expand Up @@ -696,6 +772,22 @@
]
}
]
},
{
"name": "Tinyllama",
"models": [
{
"name": "Tinyllama-1.1B-Chat-v1.0",
"repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"files": [
{
"name": "q4_K_M",
"filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"disk_space": 668788096.0
}
]
}
]
},
{
"name": "Vicuna",
Expand Down
7 changes: 6 additions & 1 deletion scripts/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ if [ "$cpu_arch" = "aarch64" ]; then
else
# Use @smartappli provided wheels
cpu_feature=$(detect_cpu_features)

if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
Expand Down Expand Up @@ -56,7 +57,11 @@ redis_process=$!
# Start the API
cd /usr/src/app/api || exit 1
hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008"
[ "$SERGE_ENABLE_IPV6" = true ] && hypercorn_cmd+=" --bind [::]:8008"
if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
hypercorn_cmd="hypercorn src.serge.main:app --bind [::]:8008"
elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
hypercorn_cmd="hypercorn src.serge.main:app --bind 0.0.0.0:8008 --bind [::]:8008"
fi

$hypercorn_cmd || {
echo 'Failed to start main app'
Expand Down
9 changes: 7 additions & 2 deletions scripts/dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ if [ "$cpu_arch" = "aarch64" ]; then
else
# Use @smartappli provided wheels
cpu_feature=$(detect_cpu_features)

if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
Expand Down Expand Up @@ -64,8 +65,12 @@ npm run dev -- --host 0.0.0.0 --port 8008 &

# Start the API
cd /usr/src/app/api || exit 1
hypercorn_cmd="hypercorn src.serge.main:api_app --reload --bind 0.0.0.0:9124"
[ "$SERGE_ENABLE_IPV6" = true ] && hypercorn_cmd+=" --bind [::]:9124"
hypercorn_cmd="hypercorn src.serge.main:api_app --bind 0.0.0.0:9124"
if [ "$SERGE_ENABLE_IPV6" = true ] && [ "$SERGE_ENABLE_IPV4" != true ]; then
hypercorn_cmd="hypercorn src.serge.main:api_app --bind [::]:9124"
elif [ "$SERGE_ENABLE_IPV4" = true ] && [ "$SERGE_ENABLE_IPV6" = true ]; then
hypercorn_cmd="hypercorn src.serge.main:api_app --bind 0.0.0.0:9124 --bind [::]:9124"
fi

$hypercorn_cmd || {
echo 'Failed to start main app'
Expand Down
3 changes: 2 additions & 1 deletion scripts/serge.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
SERGE_GPU_NVIDIA_SUPPORT=false
SERGE_GPU_AMD_SUPPORT=false
LLAMA_PYTHON_VERSION=0.2.44
LLAMA_PYTHON_VERSION=0.2.50
SERGE_ENABLE_IPV4=true
SERGE_ENABLE_IPV6=false
Loading

0 comments on commit 9dc8f42

Please sign in to comment.