Skip to content

Commit

Permalink
[FEAT][Llava]
Browse files Browse the repository at this point in the history
  • Loading branch information
Kye committed Apr 24, 2024
1 parent c0de2d8 commit 526da75
Show file tree
Hide file tree
Showing 14 changed files with 161 additions and 61 deletions.
Binary file added scripts/.DS_Store
Binary file not shown.
102 changes: 102 additions & 0 deletions scripts/terraform_to_sky_serve/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
provider "aws" {
region = "us-east-1"
}

# Create a new VPC
resource "aws_vpc" "swarms_vpc" {
cidr_block = "10.0.0.0/16"
enable_dns_support = true
enable_dns_hostnames = true

tags = {
Name = "SwarmsVPC"
}
}

# Create an Internet Gateway for the VPC
resource "aws_internet_gateway" "swarms_igw" {
vpc_id = aws_vpc.swarms_vpc.id

tags = {
Name = "SwarmsIGW"
}
}

# Create a subnet within the VPC
resource "aws_subnet" "swarms_subnet" {
vpc_id = aws_vpc.swarms_vpc.id
cidr_block = "10.0.1.0/24"
availability_zone = "us-east-1a"

map_public_ip_on_launch = true

tags = {
Name = "SwarmsSubnet"
}
}

# CloudWatch Log Group
resource "aws_cloudwatch_log_group" "swarms_log_group" {
name = "/ec2/swarms/logs"
retention_in_days = 14
}

# CloudWatch Log Stream
resource "aws_cloudwatch_log_stream" "swarms_log_stream" {
name = "InstanceLogStream"
log_group_name = aws_cloudwatch_log_group.swarms_log_group.name
}


# Create a security group in the VPC
resource "aws_security_group" "swarms_sg" {
name = "swarms-sg"
description = "Security Group for EC2 Instances in Swarms VPC"
vpc_id = aws_vpc.swarms_vpc.id

ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}

tags = {
Name = "swarms-sg"
}
}

# Create a Route 53 hosted zone
resource "aws_route53_zone" "swarms_zone" {
name = "api.swarms.world"
}

# EC2 Instance using the created resources
resource "aws_instance" "swarms_ec2" {
ami = "ami-0ac1f653c5b6af751" # Specify the correct AMI ID
instance_type = "m6i.xlarge"
subnet_id = aws_subnet.swarms_subnet.id
key_name = "your-key-name" # Update with your actual key name
vpc_security_group_ids = [aws_security_group.swarms_sg.id]
associate_public_ip_address = true

tags = {
Name = "sky-sky-serve-controller-3f665020-3f66-head"
}
}

# Route 53 Record to point to the EC2 instance
resource "aws_route53_record" "swarms_record" {
zone_id = aws_route53_zone.swarms_zone.zone_id
name = "api.swarms.world"
type = "A"
ttl = "300"
records = [aws_instance.swarms_ec2.public_ip]
}
1 change: 0 additions & 1 deletion servers/cogvlm/cogvlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ def generate_stream_cogvlm(
query, history, image_list = process_history_and_images(messages)
logger.debug(f"==== request ====\n{query}")


input_by_model = model.build_conversation_input_ids(
tokenizer, query=query, history=history, images=[image_list[-1]]
)
Expand Down
4 changes: 2 additions & 2 deletions servers/cogvlm/openai_api_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

# Set the API key
client = openai.OpenAI(

# clusters
# base_url="https://44.203.144.168:30001/v1", api_key="sk-1g3Z3y2c1f2z3d4b5p6w7c8b9v0n1m2"
base_url="http://40.87.83.103:30001/v1", api_key="sk-1g3Z3y2c1f2z3d4b5p6w7c8b9v0n1m2"
base_url="http://40.87.83.103:30001/v1",
api_key="sk-1g3Z3y2c1f2z3d4b5p6w7c8b9v0n1m2"
# base_url="https://18.189.185.191:8000/v1", api_key="sk-1g3Z3y2c1f2z3d4b5p6w7c8b9v0n1m2"
)
# Create a client object to interact with the OpenAI API
Expand Down
2 changes: 1 addition & 1 deletion servers/cogvlm/sky_serve.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ envs:

# Fields below describe each replica.
resources:
accelerators: {L4:8, A10g:8, A100:4, A100:8, A100-80GB:2}
accelerators: {L4:8, A10g:8, A100:4, A100:8, A100-80GB:2, V100:1} #V100:4, V100:8, H100:1, T4:1, A10:1 }
# cpus: 32+
# memory: 512+
# use_spot: True
Expand Down
8 changes: 5 additions & 3 deletions servers/llama3/sky_serve.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Serving Meta Llama-3 on your own infra.

# Serving Meta Llama-3 on your own infra.
#
# Usage:
#
# HF_TOKEN=xxx sky launch llama3.yaml -c llama3 --env HF_TOKEN
Expand All @@ -10,7 +10,6 @@
#
# # We need to manually specify the stop_token_ids to make sure the model finish
# # on <|eot_id|>.

# curl http://$ENDPOINT/v1/chat/completions \
# -H "Content-Type: application/json" \
# -d '{
Expand Down Expand Up @@ -56,10 +55,12 @@
# ]
# }'


envs:
# MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
MODEL_NAME: meta-llama/Meta-Llama-3-8B-Instruct
HF_TOKEN: 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 # Change to your own huggingface token, or use --env to pass.
HF_HUB_ENABLE_HF_TRANSFER: True

service:
replicas: 2
Expand Down Expand Up @@ -95,6 +96,7 @@ setup: |
# Install Gradio for web UI.
pip install gradio openai
pip install flash-attn==2.5.7
pip install hf_transfer
run: |
# Serve VLM
Expand Down
15 changes: 12 additions & 3 deletions servers/llava/llava_request.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
import os

from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

openai_api_base = os.getenv("OPENAI_API_BASE", "http://34.201.45.48:30002/v1")
model = os.getenv("MODEL", "llava")

client = OpenAI(
api_key=openai_api_key,
api_key="sk-23232323",
base_url=openai_api_base,
timeout=500
)
# Note that this model expects the image to come before the main text
chat_response = client.chat.completions.create(
model="llava-hf/llava-1.5-7b-hf",
model=model,
messages=[{
"role": "user",
"content": [
Expand Down
21 changes: 13 additions & 8 deletions servers/lm_deploy/api.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
from lmdeploy import pipeline, TurbomindEngineConfig

engine_config = TurbomindEngineConfig(model_format='awq')
engine_config = TurbomindEngineConfig(model_format="awq")
pipe = pipeline("./internlm2-chat-7b-4bit", backend_config=engine_config)
response = pipe(["Hi, pls intro yourself", "Shanghai is"])
print(response)

# inference with models from lmdeploy space
from lmdeploy import pipeline, TurbomindEngineConfig
pipe = pipeline("lmdeploy/llama2-chat-70b-4bit",
backend_config=TurbomindEngineConfig(model_format='awq', tp=4))

pipe = pipeline(
"lmdeploy/llama2-chat-70b-4bit",
backend_config=TurbomindEngineConfig(model_format="awq", tp=4),
)
response = pipe(["Hi, pls intro yourself", "Shanghai is"])
print(response)

# inference with models from thebloke space
from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
pipe = pipeline("TheBloke/LLaMA2-13B-Tiefighter-AWQ",
backend_config=TurbomindEngineConfig(model_format='awq'),
chat_template_config=ChatTemplateConfig(model_name='llama2')
)

pipe = pipeline(
"TheBloke/LLaMA2-13B-Tiefighter-AWQ",
backend_config=TurbomindEngineConfig(model_format="awq"),
chat_template_config=ChatTemplateConfig(model_name="llama2"),
)
response = pipe(["Hi, pls intro yourself", "Shanghai is"])
print(response)
print(response)
4 changes: 2 additions & 2 deletions servers/lm_deploy/openai_curl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ fi
MODEL=$MODEL

# First run curl on v1/models to get available models
curl http://$OPENAI_API_BASE:8080/v1/models
curl http://34.201.45.48:30002:8080/v1/models

# Run curl on v1/chat/completions with the specified model
curl http://$OPENAI_API_BASE:8080/v1/chat/completions \
curl http://34.201.45.48:30002/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "'$MODEL'",
Expand Down
26 changes: 14 additions & 12 deletions servers/lm_deploy/openai_vision_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@
# Note that this model expects the image to come before the main text
chat_response = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
},
},
{"type": "text", "text": "What's in this image?"},
],
}],
{"type": "text", "text": "What's in this image?"},
],
}
],
)
print("Chat response:", chat_response)
print("Chat response:", chat_response)
10 changes: 6 additions & 4 deletions servers/text_to_video/sky_serve.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ service:

# Fields below describe each replica.
resources:
accelerators: {L4:8, A10g:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
# cpus: 32+
# memory: 512+
# use_spot: True
Expand All @@ -31,10 +31,12 @@ resources:
ports: 8000 # Expose to internet traffic.
# spot_recovery: none

# workdir: ~/swarms-cloud/servers/cogvlm

setup: |
docker build -t ttv .
git clone https://github.com/kyegomez/swarms-cloud.git && \
cd swarms-cloud/servers/text_to_video && \
python3 -m pip install -r requirements.txt
run: |
docker run --gpus all ttv
python3 text_to_video.py
2 changes: 1 addition & 1 deletion servers/text_to_video/text_to_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def text_to_video(
return None


@app.post("/v1/chat/completions", response_model=TextToVideoResponse)
@app.post("/v1/video/completions", response_model=TextToVideoResponse)
async def create_chat_completion(
request: TextToVideoRequest, # token: str = Depends(authenticate_user)
):
Expand Down
22 changes: 0 additions & 22 deletions swarms_cloud/methods.py

This file was deleted.

5 changes: 3 additions & 2 deletions swarms_cloud/stripe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import stripe
from pydantic import BaseModel

stripe.api_key = "your_stripe_api_key"
import os


class StripeInterface(BaseModel):
Expand All @@ -13,6 +12,8 @@ class StripeInterface(BaseModel):


def bill_customer(customer_id: str, amount: float, description: str):
stripe.api_key = os.getenv("STRIPE_API_KEY")

try:
stripe.Charge.create(
customer=customer_id,
Expand Down

0 comments on commit 526da75

Please sign in to comment.