Skip to content

Commit

Permalink
Add deepseek-r1 examples
Browse files Browse the repository at this point in the history
Add deepseek-r1 examples

Fix newline check
  • Loading branch information
Bihan Rana authored and Bihan Rana committed Jan 27, 2025
1 parent 5b2b22e commit a453997
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 8 deletions.
15 changes: 15 additions & 0 deletions examples/.dstack-task.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
type: dev-environment
# The name is optional, if not specified, generated randomly
name: vscode

python: "3.11"
# Uncomment to use a custom Docker image
#image: dstackai/base:py3.13-0.6-cuda-12.1
ide: vscode


# Uncomment to leverage spot instances
#spot_policy: auto

resources:
gpu: 24GB
36 changes: 28 additions & 8 deletions examples/.dstack.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
type: dev-environment
# The name is optional, if not specified, generated randomly
name: vscode
type: service
name: llama31

python: "3.11"
# Uncomment to use a custom Docker image
#image: dstackai/base:py3.13-0.6-cuda-12.1
env:
- HF_TOKEN
- MODEL_ID=meta-llama/Llama-3.2-1B
- MAX_MODEL_LEN=4096
commands:
- pip install vllm
- curl -o simple_chat_template.jinja https://github.com/Bihan/vllm/blob/main/examples/simple_chat_template.jinja
- vllm serve $MODEL_ID
--max-model-len $MAX_MODEL_LEN
--chat-template simple_chat_template.jinja

ide: vscode
auth: false

# Use either spot or on-demand instances
spot_policy: auto
port: 8000
# Register the model
model:
name: meta-llama/Llama-3.2-1B
type: chat
format: openai

# Uncomment to leverage spot instances
#spot_policy: auto

# Uncomment to cache downloaded models
#volumes:
# - /root/.cache/huggingface/hub:/root/.cache/huggingface/hub

resources:
gpu: 24GB
# Uncomment if using multiple GPUs
#shm_size: 24GB
21 changes: 21 additions & 0 deletions examples/llms/deepseek/sglang/amd/.dstack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
type: service
name: deepseek-r1-amd

image: lmsysorg/sglang:v0.4.1.post4-rocm620
env:
- MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-70B
commands:
- python3 -m sglang.launch_server
--model-path $MODEL_ID
--port 8000
--trust-remote-code

port: 8000
model:
name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
type: chat
format: openai

resources:
gpu: mi300x
disk: 300Gb
22 changes: 22 additions & 0 deletions examples/llms/deepseek/sglang/nvidia/.dstack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
type: service
name: deepseek-r1-nvidia

image: lmsysorg/sglang:latest
env:
- MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-8B
commands:
- python3 -m sglang.launch_server
--model-path $MODEL_ID
--port 8000
--trust-remote-code

port: 8000

model:
name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
type: chat
format: openai


resources:
gpu: 24GB
24 changes: 24 additions & 0 deletions examples/llms/deepseek/vllm/amd/.dstack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
type: service
name: deepseek-r1-amd

image: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
env:
- MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-70B
- MAX_MODEL_LEN=4096

commands:
- pip install vllm
- vllm serve $MODEL_ID
--max-model-len $MAX_MODEL_LEN

port: 8000

model:
name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
type: chat
format: openai


resources:
gpu: mi300x
disk: 300Gb
21 changes: 21 additions & 0 deletions examples/llms/deepseek/vllm/nvidia/.dstack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
type: service
name: deepseek-r1-nvidia

image: vllm/vllm-openai:latest
env:
- MODEL_ID=deepseek-ai/DeepSeek-R1-Distill-Llama-8B
- MAX_MODEL_LEN=4096
commands:
- pip install vllm
- vllm serve $MODEL_ID
--max-model-len $MAX_MODEL_LEN

port: 8000

model:
name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
type: chat
format: openai

resources:
gpu: 24GB

0 comments on commit a453997

Please sign in to comment.