proxy.yaml

model_list:
  - model_name: gpt-3.5-turbo
    litellm_params:
      model: openai/gpt-3.5-turbo
      api_key: os.environ/OPENAI_API_KEY # runs os.getenv("OPENAI_API_KEY")
      rpm: 6      # Rate limit for this deployment: in requests per minute (rpm)
  - model_name: gpt-3.5-turbo-instruct
    litellm_params:
      model: openai/gpt-3.5-turbo-instruct
      api_key: os.environ/OPENAI_API_KEY # runs os.getenv("OPENAI_API_KEY")
  - model_name: gpt-3.5-turbo-16k
    litellm_params:
      model: openai/gpt-3.5-turbo-16k
      api_key: os.environ/OPENAI_API_KEY # runs os.getenv("OPENAI_API_KEY")
  - model_name: gpt-4-turbo
    litellm_params:
      model: openai/gpt-4-turbo
      api_key: os.environ/OPENAI_API_KEY
  - model_name: gpt-4
    litellm_params:
      model: openai/gpt-4
      api_key: os.environ/OPENAI_API_KEY
      rpm: 6      # Rate limit for this deployment: in requests per minute (rpm)
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
      api_key: os.environ/OPENAI_API_KEY
  - model_name: gpt-4o-mini
    litellm_params:
      model: openai/gpt-4o-mini
      api_key: os.environ/OPENAI_API_KEY

#      "id": "gpt-4-1106-preview",
#      "id": "text-embedding-3-small",
#      "id": "tts-1-1106",
#      "id": "dall-e-2",
#      "id": "tts-1",
#      "id": "gpt-4-32k-0314",
#      "id": "tts-1-hd-1106",
#      "id": "tts-1-hd",
#      "id": "dall-e-3",
#      "id": "whisper-1",
#        "id": "gpt-4",
#      "id": "gpt-4o-2024-05-13",
#        "id": "gpt-4-turbo",
#      "id": "gpt-4-turbo-2024-04-09",
#      "id": "gpt-4-0125-preview",
#        "id": "gpt-3.5-turbo",
#      "id": "gpt-4-turbo-preview",
#      "id": "gpt-3.5-turbo-0125",
#        "id": "gpt-4o-mini",
#      "id": "gpt-4o-mini-2024-07-18",
#      "id": "gpt-3.5-turbo-1106",
#        "id": "gpt-3.5-turbo-16k",
#      "id": "gpt-3.5-turbo-instruct-0914",
#      "id": "gpt-4-0613",
#        "id": "gpt-3.5-turbo-instruct",
#      "id": "gpt-4o-2024-08-06",
#      "id": "babbage-002",
#      "id": "davinci-002",
#      "id": "gpt-4-0314",
#      "id": "chatgpt-4o-latest",
#        "id": "gpt-4o",
#      "id": "text-embedding-3-large",
#      "id": "text-embedding-ada-002",

#Fallback
# Works for ALL Providers and needs the default provider credentials in .env
  - model_name: "*"
    litellm_params:
      model: "*"

router_settings:
  redis_host: "llm-cache"
  redis_password: "genie_redis_password"
  redis_port: 6379

litellm_settings:
 # callbacks: ["langfuse"] #Bug
  failure_callback: [ "langfuse" ]
  success_callback: [ "langfuse" ]
  langfuse_default_tags: [ "cache_hit", "cache_key", "semantic-similarity", "proxy_base_url" ]
  redact_user_api_key_info: false
  turn_off_message_logging: false
  num_retries: 2 # retry call 3 times on each model_name (e.g. zephyr-beta)
  request_timeout: 120 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
  fallbacks: [{"gpt-4": ["gpt-4o-mini"]}] # fallback to gpt-3.5-turbo if call fails num_retries
  context_window_fallbacks: [{"gpt-4": ["gpt-4-turbo"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
  cache: true          # set cache responses to True, litellm defaults to using a redis cache
  cache_params:        # set cache params for redis
    type: redis
    namespace: "litellm_caching"
    #ttl: 86400 # will be cached on redis for 600s
    ttl: 120
    default_in_memory_ttl: 120 # default_in_memory_ttl: Optional[float], default is None. time in seconds.
    # default_in_redis_ttl: Optional[float], default is None. time in seconds.

#general_settings:
#    master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
#    alerting: [ "slack" ] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env