Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exponential backoff to handle Play.ht 429s #438

Merged
merged 3 commits into from
Nov 30, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 51 additions & 36 deletions vocode/streaming/synthesizer/play_ht_synthesizer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
import asyncio
import io
import logging
from typing import Optional

from aiohttp import ClientSession, ClientTimeout
from pydub import AudioSegment
import requests
from opentelemetry.context.context import Context

from vocode import getenv
from vocode.streaming.agent.bot_sentiment_analyser import BotSentiment
from vocode.streaming.models.message import BaseMessage
Expand All @@ -19,7 +14,6 @@
)
from vocode.streaming.utils.mp3_helper import decode_mp3


TTS_ENDPOINT = "https://play.ht/api/v2/tts/stream"


Expand All @@ -29,6 +23,8 @@ def __init__(
synthesizer_config: PlayHtSynthesizerConfig,
logger: Optional[logging.Logger] = None,
aiohttp_session: Optional[ClientSession] = None,
max_backoff_retries=3,
backoff_retry_delay=2,
):
super().__init__(synthesizer_config, aiohttp_session)
self.synthesizer_config = synthesizer_config
Expand All @@ -40,6 +36,8 @@ def __init__(
)
self.words_per_minute = 150
self.experimental_streaming = synthesizer_config.experimental_streaming
self.max_backoff_retries = max_backoff_retries
self.backoff_retry_delay = backoff_retry_delay

async def create_speech(
self,
Expand All @@ -48,12 +46,13 @@ async def create_speech(
bot_sentiment: Optional[BotSentiment] = None,
) -> SynthesisResult:
headers = {
"Authorization": f"Bearer {self.api_key}",
"X-User-ID": self.user_id,
"AUTHORIZATION": f"Bearer {self.api_key}",
"X-USER-ID": self.user_id,
"Accept": "audio/mpeg",
"Content-Type": "application/json",
}
body = {
"quality": "draft",
"voice": self.synthesizer_config.voice_id,
"text": message.text,
"sample_rate": self.synthesizer_config.sampling_rate,
Expand All @@ -69,33 +68,49 @@ async def create_speech(
f"synthesizer.{SynthesizerType.PLAY_HT.value.split('_', 1)[-1]}.create_total",
)

response = await self.aiohttp_session.post(
TTS_ENDPOINT, headers=headers, json=body, timeout=ClientTimeout(total=15)
)
if not response.ok:
raise Exception(f"Play.ht API error status code {response.status}")
if self.experimental_streaming:
return SynthesisResult(
self.experimental_mp3_streaming_output_generator(
response, chunk_size, create_speech_span
), # should be wav
lambda seconds: self.get_message_cutoff_from_voice_speed(
message, seconds, self.words_per_minute
),
)
else:
read_response = await response.read()
create_speech_span.end()
convert_span = tracer.start_span(
f"synthesizer.{SynthesizerType.PLAY_HT.value.split('_', 1)[-1]}.convert",
)
output_bytes_io = decode_mp3(read_response)
backoff_retry_delay = self.backoff_retry_delay
max_backoff_retries = self.max_backoff_retries

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
for attempt in range(max_backoff_retries):
response = await self.aiohttp_session.post(
TTS_ENDPOINT,
headers=headers,
json=body,
timeout=ClientTimeout(total=15),
)
convert_span.end()
return result

if response.status == 429 and attempt < max_backoff_retries - 1:
await asyncio.sleep(backoff_retry_delay)
backoff_retry_delay *= 2 # Exponentially increase delay
continue

if not response.ok:
raise Exception(f"Play.ht API error status code {response.status}")

if self.experimental_streaming:
return SynthesisResult(
self.experimental_mp3_streaming_output_generator(
response, chunk_size, create_speech_span
),
lambda seconds: self.get_message_cutoff_from_voice_speed(
message, seconds, self.words_per_minute
),
)
else:
read_response = await response.read()
create_speech_span.end()
convert_span = tracer.start_span(
f"synthesizer.{SynthesizerType.PLAY_HT.value.split('_', 1)[-1]}.convert",
)
output_bytes_io = decode_mp3(read_response)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
)
convert_span.end()
return result

raise Exception("Max retries reached for Play.ht API")
Loading