Skip to content

Commit

Permalink
added live sdk
Browse files Browse the repository at this point in the history
  • Loading branch information
mmabrouk committed Dec 13, 2024
1 parent 99e23b3 commit 4f145e2
Show file tree
Hide file tree
Showing 15 changed files with 486 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,16 @@ services:
file: ./services/completion-serverless-sdk/docker-compose.yml
service: completion-serverless-sdk

completion-live-sdk:
extends:
file: ./services/completion-live-sdk/docker-compose.yml
service: completion-live-sdk

chat-live-sdk:
extends:
file: ./services/chat-live-sdk/docker-compose.yml
service: chat-live-sdk

networks:
agenta-network:
name: agenta-network
Expand Down
18 changes: 18 additions & 0 deletions services/chat-live-sdk/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM python:3.10-slim

ARG ROOT_PATH=/
ENV ROOT_PATH=${ROOT_PATH}

WORKDIR /app

COPY . .

RUN pip install --upgrade pip \
&& pip install --no-cache-dir agenta openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai

# Add agenta-cli to PYTHONPATH so it can find the local agenta package
ENV PYTHONPATH=/agenta-cli:$PYTHONPATH

EXPOSE 80

CMD ["./entrypoint.sh"]
16 changes: 16 additions & 0 deletions services/chat-live-sdk/Dockerfile.prerelease
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM python:3.10-slim

ARG ROOT_PATH=/
ENV ROOT_PATH=${ROOT_PATH}

WORKDIR /app

COPY . .

RUN pip install --upgrade pip \
&& pip install --no-cache-dir openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai \
&& pip install --no-cache-dir --pre agenta

EXPOSE 80

CMD ["./entrypoint.sh"]
52 changes: 52 additions & 0 deletions services/chat-live-sdk/_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import Dict, Any, List
import agenta as ag
import litellm
from supported_llm_models import get_all_supported_llm_models

litellm.drop_params = True
litellm.callbacks = [ag.callbacks.litellm_handler()]

SYSTEM_PROMPT = "You have expertise in offering technical ideas to startups."

ag.init()
ag.config.default(
temperature=ag.FloatParam(0.2),
model=ag.GroupedMultipleChoiceParam(
default="gpt-3.5-turbo", choices=get_all_supported_llm_models()
),
max_tokens=ag.IntParam(-1, -1, 4000),
prompt_system=ag.TextParam(SYSTEM_PROMPT),
)


async def llm_call(messages: List[Dict[str, Any]], max_tokens: int):
chat_completion = await litellm.acompletion(
model=ag.config.model,
messages=messages,
temperature=ag.config.temperature,
max_tokens=max_tokens,
)
token_usage = chat_completion.usage.dict()
return {
"usage": token_usage,
"message": chat_completion.choices[0].message.content,
"cost": litellm.cost_calculator.completion_cost(
completion_response=chat_completion, model=ag.config.model
),
}


@ag.entrypoint
@ag.instrument()
async def chat(inputs: ag.MessagesInput = ag.MessagesInput()) -> Dict[str, Any]:
messages = [{"role": "system", "content": ag.config.prompt_system}] + inputs
max_tokens = ag.config.max_tokens if ag.config.max_tokens != -1 else None
response = await llm_call(
messages=messages,
max_tokens=max_tokens,
)
return {
"message": response["message"],
"usage": response.get("usage", None),
"cost": response.get("cost", None),
}
22 changes: 22 additions & 0 deletions services/chat-live-sdk/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
services:
chat-live-sdk:
build: .
volumes:
- .:/app
- ../../agenta-cli:/agenta-cli
environment:
- AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
networks:
- agenta-network
labels:
- "traefik.http.routers.chat-live-sdk.rule=PathPrefix(`/chat-live-sdk/`)"
- "traefik.http.routers.chat-live-sdk.entrypoints=web"
- "traefik.http.middlewares.chat-live-sdk-strip.stripprefix.prefixes=/chat-live-sdk"
- "traefik.http.middlewares.chat-live-sdk-strip.stripprefix.forceslash=true"
- "traefik.http.routers.chat-live-sdk.middlewares=chat-live-sdk-strip"
- "traefik.http.services.chat-live-sdk.loadbalancer.server.port=80"
- "traefik.http.routers.chat-live-sdk.service=chat-live-sdk"

networks:
agenta-network:
external: true
9 changes: 9 additions & 0 deletions services/chat-live-sdk/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

if [ -f .env ]; then
source .env
fi

# Run uvicorn with reload watching both app and agenta-cli directories

exec python main.py
8 changes: 8 additions & 0 deletions services/chat-live-sdk/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from uvicorn import run
import agenta
import _app # This will register the routes with the FastAPI application
import os


if __name__ == "__main__":
run("agenta:app", host="0.0.0.0", port=80, reload=True, reload_dirs=[".", "/agenta-cli"])
91 changes: 91 additions & 0 deletions services/chat-live-sdk/supported_llm_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
supported_llm_models = {
"Mistral AI": [
"mistral/mistral-tiny",
"mistral/mistral-small",
"mistral/mistral-medium",
"mistral/mistral-large-latest",
],
"Open AI": [
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo",
"gpt-4",
"gpt-4o",
"gpt-4-1106-preview",
],
"Gemini": [
"gemini/gemini-1.5-pro-latest",
],
"Cohere": [
"cohere/command-light",
"cohere/command-r-plus",
"cohere/command-nightly",
],
"Anthropic": [
"anthropic/claude-2.1",
"anthropic/claude-2",
"anthropic/claude-instant-1.2",
"anthropic/claude-instant-1",
],
"Anyscale": [
"anyscale/meta-llama/Llama-2-13b-chat-hf",
"anyscale/meta-llama/Llama-2-70b-chat-hf",
],
"Perplexity AI": [
"perplexity/pplx-7b-chat",
"perplexity/pplx-70b-chat",
"perplexity/pplx-7b-online",
"perplexity/pplx-70b-online",
],
"DeepInfra": [
"deepinfra/meta-llama/Llama-2-70b-chat-hf",
"deepinfra/meta-llama/Llama-2-13b-chat-hf",
"deepinfra/codellama/CodeLlama-34b-Instruct-hf",
"deepinfra/mistralai/Mistral-7B-Instruct-v0.1",
"deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1",
],
"Together AI": [
"together_ai/togethercomputer/llama-2-70b-chat",
"together_ai/togethercomputer/llama-2-70b",
"together_ai/togethercomputer/LLaMA-2-7B-32K",
"together_ai/togethercomputer/Llama-2-7B-32K-Instruct",
"together_ai/togethercomputer/llama-2-7b",
"together_ai/togethercomputer/alpaca-7b",
"together_ai/togethercomputer/CodeLlama-34b-Instruct",
"together_ai/togethercomputer/CodeLlama-34b-Python",
"together_ai/WizardLM/WizardCoder-Python-34B-V1.0",
"together_ai/NousResearch/Nous-Hermes-Llama2-13b",
"together_ai/Austism/chronos-hermes-13b",
],
"Aleph Alpha": [
"luminous-base",
"luminous-base-control",
"luminous-extended-control",
"luminous-supreme",
],
"OpenRouter": [
"openrouter/openai/gpt-3.5-turbo",
"openrouter/openai/gpt-3.5-turbo-16k",
"openrouter/anthropic/claude-instant-v1",
"openrouter/google/palm-2-chat-bison",
"openrouter/google/palm-2-codechat-bison",
"openrouter/meta-llama/llama-2-13b-chat",
"openrouter/meta-llama/llama-2-70b-chat",
],
"Groq": [
"groq/llama3-8b-8192",
"groq/llama3-70b-8192",
"groq/llama2-70b-4096",
"groq/mixtral-8x7b-32768",
"groq/gemma-7b-it",
],
}


def get_all_supported_llm_models():
"""
Returns a list of evaluators
Returns:
List[dict]: A list of evaluator dictionaries.
"""
return supported_llm_models
18 changes: 18 additions & 0 deletions services/completion-live-sdk/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM python:3.10-slim

ARG ROOT_PATH=/
ENV ROOT_PATH=${ROOT_PATH}

WORKDIR /app

COPY . .

RUN pip install --upgrade pip \
&& pip install --no-cache-dir agenta openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai

# Add agenta-cli to PYTHONPATH so it can find the local agenta package
ENV PYTHONPATH=/agenta-cli:$PYTHONPATH

EXPOSE 80

CMD ["./entrypoint.sh"]
16 changes: 16 additions & 0 deletions services/completion-live-sdk/Dockerfile.prerelease
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM python:3.10-slim

ARG ROOT_PATH=/
ENV ROOT_PATH=${ROOT_PATH}

WORKDIR /app

COPY . .

RUN pip install --upgrade pip \
&& pip install --no-cache-dir openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai \
&& pip install --no-cache-dir --pre agenta

EXPOSE 80

CMD ["./entrypoint.sh"]
96 changes: 96 additions & 0 deletions services/completion-live-sdk/_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import agenta as ag
import litellm
from supported_llm_models import get_all_supported_llm_models

litellm.drop_params = True


prompts = {
"system_prompt": "You are an expert in geography.",
"user_prompt": """What is the capital of {country}?""",
}

GPT_FORMAT_RESPONSE = ["gpt-3.5-turbo-1106", "gpt-4-1106-preview"]


ag.init()
ag.config.default(
temperature=ag.FloatParam(default=1, minval=0.0, maxval=2.0),
model=ag.GroupedMultipleChoiceParam(
default="gpt-3.5-turbo", choices=get_all_supported_llm_models()
),
max_tokens=ag.IntParam(-1, -1, 4000),
prompt_system=ag.TextParam(prompts["system_prompt"]),
prompt_user=ag.TextParam(prompts["user_prompt"]),
top_p=ag.FloatParam(1),
frequence_penalty=ag.FloatParam(default=0.0, minval=-2.0, maxval=2.0),
presence_penalty=ag.FloatParam(default=0.0, minval=-2.0, maxval=2.0),
force_json=ag.BinaryParam(False),
)


@ag.instrument(spankind="llm")
async def llm_call(prompt_system: str, prompt_user: str):
response_format = (
{"type": "json_object"}
if ag.config.force_json and ag.config.model in GPT_FORMAT_RESPONSE
else {"type": "text"}
)
max_tokens = ag.config.max_tokens if ag.config.max_tokens != -1 else None

# Include frequency_penalty and presence_penalty only if supported
completion_params = {}
if ag.config.model in GPT_FORMAT_RESPONSE:
completion_params["frequency_penalty"] = ag.config.frequence_penalty
completion_params["presence_penalty"] = ag.config.presence_penalty

response = await litellm.acompletion(
**{
"model": ag.config.model,
"messages": [
{"content": prompt_system, "role": "system"},
{"content": prompt_user, "role": "user"},
],
"temperature": ag.config.temperature,
"max_tokens": max_tokens,
"top_p": ag.config.top_p,
"response_format": response_format,
**completion_params,
}
)
token_usage = response.usage.dict()
return {
"message": response.choices[0].message.content,
"usage": token_usage,
"cost": litellm.cost_calculator.completion_cost(
completion_response=response, model=ag.config.model
),
}


@ag.entrypoint
@ag.instrument()
async def generate(
inputs: ag.DictInput = ag.DictInput(default_keys=["country"]),
):
try:
prompt_user = ag.config.prompt_user.format(**inputs)
except Exception as e:
prompt_user = ag.config.prompt_user
try:
prompt_system = ag.config.prompt_system.format(**inputs)
except Exception as e:
prompt_system = ag.config.prompt_system

# SET MAX TOKENS - via completion()
if ag.config.force_json and ag.config.model not in GPT_FORMAT_RESPONSE:
raise ValueError(
"Model {} does not support JSON response format".format(ag.config.model)
)

response = await llm_call(prompt_system=prompt_system, prompt_user=prompt_user)
return {
"message": response["message"],
"usage": response.get("usage", None),
"cost": response.get("cost", None),
}
22 changes: 22 additions & 0 deletions services/completion-live-sdk/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
services:
completion-live-sdk:
build: .
volumes:
- .:/app
- ../../agenta-cli:/agenta-cli
environment:
- AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
networks:
- agenta-network
labels:
- "traefik.http.routers.completion-live-sdk.rule=PathPrefix(`/completion-live-sdk/`)"
- "traefik.http.routers.completion-live-sdk.entrypoints=web"
- "traefik.http.middlewares.completion-live-sdk-strip.stripprefix.prefixes=/completion-live-sdk"
- "traefik.http.middlewares.completion-live-sdk-strip.stripprefix.forceslash=true"
- "traefik.http.routers.completion-live-sdk.middlewares=completion-live-sdk-strip"
- "traefik.http.services.completion-live-sdk.loadbalancer.server.port=80"
- "traefik.http.routers.completion-live-sdk.service=completion-live-sdk"

networks:
agenta-network:
external: true
Loading

0 comments on commit 4f145e2

Please sign in to comment.