diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index f4048b8..c3bac3e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -35,28 +35,26 @@ "vscode": { "extensions": [ "ms-python.python", + "ms-python.vscode-pylance", "ms-python.debugpy", "KevinRose.vsc-python-indent", "njpwerner.autodocstring", "GitHub.copilot", "GitHub.copilot-chat", - "ZainChen.json" + "ZainChen.json", + "VisualStudioExptTeam.vscodeintellicode", + "VisualStudioExptTeam.vscodeintellicode-completions" ] }, }, - // Features to add to the dev container. More info: https://containers.dev/features. // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg && pip3 install --user -r requirements.txt", - + "postCreateCommand": "echo 'export PATH=$PATH:/home/vscode/.local/bin' >> /home/vscode/.bashrc && sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg && pip3 install --user -r requirements.txt", // Configure tool-specific properties. // "customizations": {}, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. // "remoteUser": "root" -} +} \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..3fa9780 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "ms-python.vscode-pylance" + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b22b768 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.analysis.typeCheckingMode": "off", + "python.analysis.autoSearchPaths": true, + "python.analysis.useLibraryCodeForTypes": true, + "python.analysis.logLevel": "Trace" // For debugging purposes +} \ No newline at end of file diff --git a/README.md b/README.md index 4fd6616..e1cf333 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# live-news-break +# AI Live News Reader for Radio Stations The `live-news-break` repository contains a news generation script that fetches, processes, and converts news articles into an audio news broadcast. This guide will help you understand how to set up, configure, and run the news generation process using the script provided. @@ -20,12 +20,11 @@ Listen to the demo here: [https://audio.com/troy-8/audio/troykelly-live-news-bre To test with docker, see the example command below. -This will create a completly dry read, as we can't distribute the audio files with the package - you will have to create or find your own. +This will create a completely dry read, as we can't distribute the audio files with the package - you will have to create or find your own. ```bash docker run --rm -e OPENAI_API_KEY=SETKEYHERE -e NEWS_READER_OUTPUT_DIR=/mnt/audio -v "${PWD}:/mnt/audio" ghcr.io/troykelly/live-news-break:edge ``` - *Make sure to set your correct OPENAI_API_KEY* ## Prerequisites @@ -59,13 +58,14 @@ Here's an example configuration, see the docker compose file for an exhaustive l ```dotenv OPENAI_API_KEY=sk-proj-KEYKEYKEY -OPENWEATHER_API_KEY=KEYKEYKEY +ELEVENLABS_API_KEY=KEYKEYKEY NEWS_READER_STATION_NAME=News Update Radio NEWS_READER_READER_NAME=OpenAI Shimmer NEWS_READER_STATION_CITY=Sydney NEWS_READER_STATION_COUNTRY=Australia NEWS_READER_TTS_VOICE=shimmer -NEWS_READER_TTS_QUALITY=tts-1-hd +NEWS_READER_TTS_MODEL=tts-1-hd +NEWS_READER_TTS_PROVIDER=elevenlabs NEWS_READER_AUDIO_INTRO=audio/intro.wav NEWS_READER_AUDIO_OUTRO=audio/outro.wav NEWS_READER_AUDIO_FIRST=audio/first.wav @@ -81,202 +81,170 @@ NEWS_READER_GAIN_BED=-15 NEWS_READER_FADEIN_BED=0 NEWS_READER_FADEOUT_BED=500 NEWS_READER_BOM_PRODUCT_ID=IDN10064 +OPENWEATHER_API_KEY=KEYKEYKEY OPENWEATHER_LAT=-33.8688 OPENWEATHER_LON=151.2093 ``` -Ensure to replace placeholder values, especially the `OPENAI_API_KEY` and `OPENWEATHER_API_KEY`, with your actual API keys. +Ensure to replace placeholder values, especially the `OPENAI_API_KEY` and `ELEVENLABS_API_KEY` with your actual API keys. ## Environment Variables Explainer -This document provides an overview and explanation of the environment variables used in the News Reader application. Each variable's purpose, default value (if applicable), and example usage are provided below. +This section provides an overview and explanation of the environment variables used in the News Reader application. ### General Configuration -#### `OPENAI_API_KEY` -- **Description:** API key for OpenAI, used for generating news scripts via the GPT model. -- **Example:** `sk-abc123` +- **`OPENAI_API_KEY`**: API key for OpenAI, used for generating news scripts via the GPT model. + - **Example:** `sk-abc123` + +- **`ELEVENLABS_API_KEY`**: API key for ElevenLabs, used for TTS voice generation. + - **Example:** `abc123` -#### `NEWS_READER_CRON` -- **Description:** Cron expression to schedule the news generation. If not set, the script runs once. -- **Default:** N/A -- **Example:** `13,28,43,58 * * * *` (fires two minutes before every quarter hour) +- **`NEWS_READER_CRON`**: Cron expression to schedule the news generation. If not set, the script runs once. + - **Example:** `13,28,43,58 * * * *` -#### `NEWS_READER_RSS` -- **Description:** URL of the RSS feed to parse. -- **Default:** `https://raw.githubusercontent.com/troykelly/live-news-break/main/demo.xml` -- **Example:** `https://example.com/rss-feed` +- **`NEWS_READER_RSS`**: URL of the RSS feed to parse. + - **Default:** `https://raw.githubusercontent.com/troykelly/live-news-break/main/demo.xml` + - **Example:** `https://example.com/rss-feed` -#### `NEWS_READER_OUTPUT_DIR` -- **Description:** Directory where the generated audio files are saved. -- **Default:** `.` -- **Example:** `/output` +- **`NEWS_READER_OUTPUT_DIR`**: Directory where the generated audio files are saved. + - **Default:** `.` + - **Example:** `/output` -#### `NEWS_READER_OUTPUT_FILE` -- **Description:** File name template for the output audio file. Supports placeholders: `%Y%`, `%m%`, `%d%`, `%H%`, `%M%`, `%S%`, `%EXT%`. -- **Default:** `livenews.%EXT%` -- **Example:** `news_%Y%_%m%_%d%_%H%_%M%_%S%.mp3` +- **`NEWS_READER_OUTPUT_FILE`**: File name template for the output audio file. Supports placeholders: `%Y%`, `%m%`, `%d%`, `%H%`, `%M%`, `%S%`, `%EXT%`. + - **Default:** `livenews.%EXT%` + - **Example:** `news_%Y%_%m%_%d%_%H%_%M%_%S%.mp3` -#### `NEWS_READER_OUTPUT_LINK` -- **Description:** Path to create a symbolic link pointing to the latest output file. If not set, no symbolic link is created. -- **Example:** `/path/to/latest_news.mp3` +- **`NEWS_READER_OUTPUT_LINK`**: Path to create a symbolic link pointing to the latest output file. If not set, no symbolic link is created. + - **Example:** `/path/to/latest_news.mp3` ### Station Configuration -#### `NEWS_READER_STATION_NAME` -- **Description:** Name of the radio station. -- **Default:** `Live News 24` -- **Example:** `News Update Radio` +- **`NEWS_READER_STATION_NAME`**: Name of the radio station. + - **Default:** `Live News 24` + - **Example:** `News Update Radio` -#### `NEWS_READER_READER_NAME` -- **Description:** Name of the news reader. -- **Default:** `Burnie Housedown` -- **Example:** `OpenAI Shimmer` +- **`NEWS_READER_READER_NAME`**: Name of the news reader. + - **Default:** `Burnie Housedown` + - **Example:** `OpenAI Shimmer` -#### `NEWS_READER_STATION_CITY` -- **Description:** City where the station is located. -- **Default:** `Sydney` -- **Example:** `Melbourne` +- **`NEWS_READER_STATION_CITY`**: City where the station is located. + - **Default:** `Sydney` + - **Example:** `Melbourne` -#### `NEWS_READER_STATION_COUNTRY` -- **Description:** Country where the station is located. -- **Default:** `Australia` -- **Example:** `United States` +- **`NEWS_READER_STATION_COUNTRY`**: Country where the station is located. + - **Default:** `Australia` + - **Example:** `United States` ### Audio Configuration -#### `NEWS_READER_TTS_VOICE` -- **Description:** Voice to be used by the text-to-speech service. -- **Default:** `alloy` -- **Example:** `shimmer` +- **`NEWS_READER_TTS_VOICE`**: Voice to be used by the text-to-speech service. + - **Default:** `alloy` + - **Example:** `shimmer` -#### `NEWS_READER_TTS_QUALITY` -- **Description:** Quality settings for the TTS. -- **Default:** `tts-1` -- **Example:** `tts-1-hd` +- **`NEWS_READER_TTS_MODEL`**: Model settings for the TTS. + - **Default:** `tts-1` + - **Example:** `tts-1-hd` -#### `NEWS_READER_OUTPUT_FORMAT` -- **Description:** Format for the output audio file. -- **Default:** `flac` -- **Example:** `mp3` +- **`NEWS_READER_TTS_PROVIDER`**: TTS provider to use. + - **Default:** `openai` + - **Example:** `elevenlabs` -#### Audio Files +- **`NEWS_READER_OUTPUT_FORMAT`**: Format for the output audio file. + - **Default:** `flac` + - **Example:** `mp3` -##### Required Audio Files +### Audio Files -- `NEWS_READER_AUDIO_INTRO`: Path to the introduction audio file. +- **`NEWS_READER_AUDIO_INTRO`**: Path to the introduction audio file. - **Example:** `audio/intro.wav` -- `NEWS_READER_AUDIO_OUTRO`: Path to the outro audio file. +- **`NEWS_READER_AUDIO_OUTRO`**: Path to the outro audio file. - **Example:** `audio/outro.wav` -- `NEWS_READER_AUDIO_FIRST`: Path to the first news article audio file. +- **`NEWS_READER_AUDIO_FIRST`**: Path to the first news article audio file. - **Example:** `audio/first.wav` -- `NEWS_READER_AUDIO_BREAK`: Path to the break between articles audio file. +- **`NEWS_READER_AUDIO_BREAK`**: Path to the break between articles audio file. - **Example:** `audio/break.wav` -- `NEWS_READER_AUDIO_BED`: Path to the bed music file. +- **`NEWS_READER_AUDIO_BED`**: Path to the bed music file. - **Example:** `audio/bed.wav` -##### Timing Configuration +### Timing Configuration -- `NEWS_READER_TIMING_INTRO`: Timing offset for introduction. - - **Default:** `None` +- **`NEWS_READER_TIMING_INTRO`**: Timing offset for introduction. - **Example:** `16500` -- `NEWS_READER_TIMING_OUTRO`: Timing offset for outro. - - **Default:** `None` +- **`NEWS_READER_TIMING_OUTRO`**: Timing offset for outro. - **Example:** `8500` -- `NEWS_READER_TIMING_BREAK`: Timing offset for break. - - **Default:** `None` +- **`NEWS_READER_TIMING_BREAK`**: Timing offset for break. - **Example:** `1600` -- `NEWS_READER_TIMING_FIRST`: Timing offset for the first article. - - **Default:** `None` +- **`NEWS_READER_TIMING_FIRST`**: Timing offset for the first article. - **Example:** `3300` -- `NEWS_READER_TIMING_BED`: Timing offset for bed music. - - **Default:** `None` +- **`NEWS_READER_TIMING_BED`**: Timing offset for bed music. - **Example:** `-500` -##### Gain Configuration +### Gain Configuration -- `NEWS_READER_GAIN_VOICE`: Gain for voice audio. - - **Default:** `None` +- **`NEWS_READER_GAIN_VOICE`**: Gain for voice audio. - **Example:** `-3` -- `NEWS_READER_GAIN_INTRO`: Gain for introduction audio. - - **Default:** `None` +- **`NEWS_READER_GAIN_INTRO`**: Gain for introduction audio. - **Example:** `-6` -- `NEWS_READER_GAIN_OUTRO`: Gain for outro audio. - - **Default:** `None` +- **`NEWS_READER_GAIN_OUTRO`**: Gain for outro audio. - **Example:** `-6` -- `NEWS_READER_GAIN_BREAK`: Gain for break audio. - - **Default:** `None` +- **`NEWS_READER_GAIN_BREAK`**: Gain for break audio. - **Example:** `-6` -- `NEWS_READER_GAIN_FIRST`: Gain for the first article audio. - - **Default:** `None` +- **`NEWS_READER_GAIN_FIRST`**: Gain for the first article audio. - **Example:** `-6` -- `NEWS_READER_GAIN_BED`: Gain for bed music audio. - - **Default:** `None` +- **`NEWS_READER_GAIN_BED`**: Gain for bed music audio. - **Example:** `-15` -##### Fade Configuration +### Fade Configuration -- `NEWS_READER_FADEIN_INTRO`: Fade-in duration for introduction. - - **Default:** `None` +- **`NEWS_READER_FADEIN_INTRO`**: Fade-in duration for introduction. - **Example:** `1000` -- `NEWS_READER_FADEIN_OUTRO`: Fade-in duration for outro. - - **Default:** `None` +- **`NEWS_READER_FADEIN_OUTRO`**: Fade-in duration for outro. - **Example:** `1000` -- `NEWS_READER_FADEIN_BREAK`: Fade-in duration for break. - - **Default:** `None` +- **`NEWS_READER_FADEIN_BREAK`**: Fade-in duration for break. - **Example:** `1000` -- `NEWS_READER_FADEIN_FIRST`: Fade-in duration for the first article. - - **Default:** `None` +- **`NEWS_READER_FADEIN_FIRST`**: Fade-in duration for the first article. - **Example:** `1000` -- `NEWS_READER_FADEIN_BED`: Fade-in duration for bed music. - - **Default:** `None` +- **`NEWS_READER_FADEIN_BED`**: Fade-in duration for bed music. - **Example:** `0` - -- `NEWS_READER_FADEOUT_INTRO`: Fade-out duration for introduction. - - **Default:** `None` +- **`NEWS_READER_FADEOUT_INTRO`**: Fade-out duration for introduction. - **Example:** `1000` -- `NEWS_READER_FADEOUT_OUTRO`: Fade-out duration for outro. - - **Default:** `None` +- **`NEWS_READER_FADEOUT_OUTRO`**: Fade-out duration for outro. - **Example:** `1000` -- `NEWS_READER_FADEOUT_BREAK`: Fade-out duration for break. - - **Default:** `None` +- **`NEWS_READER_FADEOUT_BREAK`**: Fade-out duration for break. - **Example:** `1000` -- `NEWS_READER_FADEOUT_FIRST`: Fade-out duration for the first article. - - **Default:** `None` +- **`NEWS_READER_FADEOUT_FIRST`**: Fade-out duration for the first article. - **Example:** `1000` -- `NEWS_READER_FADEOUT_BED`: Fade-out duration for bed music. - - **Default:** `None` +- **`NEWS_READER_FADEOUT_BED`**: Fade-out duration for bed music. - **Example:** `500` ### Lexicon Configuration -#### `NEWS_READER_LEXICON_JSON` -- **Description:** Path to the lexicon JSON file for text conversion. -- **Default:** `./lexicon.json` -- **Example:** `/path/to/lexicon.json` +- **`NEWS_READER_LEXICON_JSON`**: Path to the lexicon JSON file for text conversion. + - **Default:** `./lexicon.json` + - **Example:** `/path/to/lexicon.json` ### Weather Data Configuration -#### `NEWS_READER_WEATHER_JSON` -- **Description:** Path to the weather data JSON file. -- **Default:** `./weather.json` -- **Example:** `/path/to/weather.json` +- **`NEWS_READER_WEATHER_JSON`**: Path to the weather data JSON file. + - **Default:** `./weather.json` + - **Example:** `/path/to/weather.json` -#### Bureau of Meteorology (BOM) Configuration +### Bureau of Meteorology (BOM) Configuration -- `NEWS_READER_BOM_PRODUCT_ID`: BOM product ID for weather data. +- **`NEWS_READER_BOM_PRODUCT_ID`**: BOM product ID for weather data. - **Default:** `IDN10064` - **Example:** `IDN10064` -#### OpenWeather Configuration +### OpenWeather Configuration -- `OPENWEATHER_API_KEY`: API key for OpenWeatherMap. +- **`OPENWEATHER_API_KEY`**: API key for OpenWeatherMap. - **Example:** `abc123` -- `OPENWEATHER_LAT`: Latitude for the weather location. +- **`OPENWEATHER_LAT`**: Latitude for the weather location. - **Example:** `-33.8688` -- `OPENWEATHER_LON`: Longitude for the weather location. +- **`OPENWEATHER_LON`**: Longitude for the weather location. - **Example:** `151.2093` -- `OPENWEATHER_UNITS`: Units for weather data (standard, metric, imperial). +- **`OPENWEATHER_UNITS`**: Units for weather data (standard, metric, imperial). - **Default:** `metric` - **Example:** `metric` @@ -284,8 +252,9 @@ This document provides an overview and explanation of the environment variables Here's an example environment configuration you can use in your Docker Compose file or `.env` file: -```text +```dotenv OPENAI_API_KEY=sk-abc123 +ELEVENLABS_API_KEY=elevenlabs-abc123 NEWS_READER_CRON=13,28,43,58 * * * * NEWS_READER_RSS=https://example.com/rss-feed NEWS_READER_OUTPUT_DIR=/output @@ -295,8 +264,9 @@ NEWS_READER_STATION_NAME=News Update Radio NEWS_READER_READER_NAME=OpenAI Shimmer NEWS_READER_STATION_CITY=Sydney NEWS_READER_STATION_COUNTRY=Australia -NEWS_READER_TTS_VOICE=shimmer -NEWS_READER_TTS_QUALITY=tts-1-hd +NEWS_READER_TTS_VOICE=Stuart - Energetic and enthusiastic +NEWS_READER_TTS_MODEL=eleven_turbo_v2 +NEWS_READER_TTS_PROVIDER=elevenlabs NEWS_READER_OUTPUT_FORMAT=mp3 NEWS_READER_AUDIO_INTRO=audio/intro.wav NEWS_READER_AUDIO_OUTRO=audio/outro.wav diff --git a/requirements.txt b/requirements.txt index 3be5114..4166c7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,31 +1,8 @@ -annotated-types==0.7.0 -anyio==4.3.0 -certifi==2024.2.2 -charset-normalizer==3.3.2 croniter==2.0.5 -distro==1.9.0 feedparser==6.0.11 -gitdb==4.0.11 -GitPython==3.1.43 -h11==0.14.0 -httpcore==1.0.5 -httpx==0.27.0 -idna==3.7 mutagen==1.47.0 openai==1.30.1 pyacoustid==1.3.0 -pydantic==2.7.1 -pydantic_core==2.18.2 pydub==0.25.1 -python-dateutil==2.9.0.post0 pytz==2024.1 -requests==2.32.2 -setuptools==70.0.0 -sgmllib3k==1.0.0 -six==1.16.0 -smmap==5.0.1 -sniffio==1.3.1 -tqdm==4.66.4 -typing_extensions==4.11.0 -urllib3==2.2.1 -wheel==0.43.0 +Requests==2.32.2 diff --git a/src/main.py b/src/main.py index b373060..296b00b 100644 --- a/src/main.py +++ b/src/main.py @@ -25,6 +25,8 @@ from mutagen.flac import FLAC from croniter import croniter from io import BytesIO +from typing import List + logging.basicConfig(level=logging.INFO) @@ -96,6 +98,9 @@ DEFAULT_CACHE_DIR = os.getenv('NEWS_READER_DEFAULT_CACHE_DIR', '/tmp') DEFAULT_CACHE_TTL = os.getenv('NEWS_READER_DEFAULT_CACHE_TTL', 3600) +# ElevenLabs Variable configuration +ELEVENLABS_API_KEY = os.getenv('ELEVENLABS_API_KEY') + # RSS Feed configuration FEED_CONFIG = { 'TITLE': 'title', @@ -766,49 +771,6 @@ def clean_script(script): cleaned_lines.append(line) return "\n".join(cleaned_lines) -def generate_speech(news_script_chunk, api_key, voice, quality, output_format): - """Generates spoken audio from the given text script chunk using OpenAI's TTS API. - - Args: - news_script_chunk (str): News script chunk to be converted to audio. - api_key (str): OpenAI API key. - voice (str): Chosen voice for the TTS. - quality (str): Chosen quality for the TTS (e.g. 'tts-1' or 'tts-1-hd'). - output_format (str): Desired output audio format. - - Returns: - AudioSegment: The generated audio segment. - """ - text_hash = generate_hash(news_script_chunk) - cached_audio = get_cached_audio(text_hash, output_format) - - if cached_audio: - logging.info(f"Using cached audio for hash: {text_hash}") - return cached_audio - - client = OpenAI(api_key=api_key) - - processed_text = process_text_for_tts(news_script_chunk) - logging.info(f"Processed text for TTS: {processed_text}") - - try: - response = client.audio.speech.create( - model=quality, - voice=voice, - input=processed_text, - response_format=output_format - ) - - # Create a new AudioSegment object from the returned Audio - audio = AudioSegment.from_file(BytesIO(response.content), format=output_format) - - normalized_audio = audio.apply_gain(-audio.max_dBFS) # Normalize to 0 dBFS - cache_audio(text_hash, normalized_audio.export(format=output_format).read(), output_format) - - return normalized_audio - except openai.OpenAIError as e: - raise openai.OpenAIError(f"An error occurred with the OpenAI TTS API: {e}") - def read_prompt_file(file_path): """Reads the contents of a prompt file with error handling. @@ -1018,13 +980,174 @@ def submit_to_musicbrainz(output_file_path, output_format, metadata, user_key, a except ValueError: raise RuntimeError(response.text) +def openai_segments_to_speech( + segments: List[str], + api_key: str, + voice: str, + model: str, + voice_settings: dict = {}, +) -> List[AudioSegment]: + """Generate speech using OpenAI API. + + Args: + segments (list of str): The phrases to be converted to speech. + api_key (str): OpenAI API key. + voice (str): Chosen voice for the TTS. + model (str): Model version for the TTS. + voice_settings (dict): Voice settings for the TTS. + + Returns: + list of AudioSegment: Ordered list of `AudioSegment` objects representing the converted phrases. + + Raises: + Exception: If the API request fails. + """ + audio_segments: List[AudioSegment] = [] + previous_request_ids: List[str] = [] + + openai_client = OpenAI(api_key=api_key) + + for i, segment in enumerate(segments): + is_first_segment = i == 0 + is_last_segment = i == len(segments) - 1 + + response = openai_client.audio.speech.create( + model=model, + voice=voice, + input=segment, + response_format='flac' + ) + logging.info(f"Successfully converted segment {i + 1}/{len(segments)}") + audio_segment = AudioSegment.from_file(BytesIO(response.content)) + normalized_audio = audio_segment.apply_gain(-audio_segment.max_dBFS) + audio_segments.append(normalized_audio) + + return audio_segments + +def elevenlabs_segments_to_speech( + segments: List[str], + api_key: str, + voice: str, + model: str, + voice_settings: dict = {}, +) -> List[AudioSegment]: + """Generate speech using ElevenLabs API. + + Args: + segments (list of str): The phrases to be converted to speech. + api_key (str): ElevenLabs API key. + voice (str): Chosen voice for the TTS. + model (str): Model version for the TTS. + voice_settings (dict): Voice settings for the TTS. + + Returns: + list of AudioSegment: Ordered list of `AudioSegment` objects representing the converted phrases. + + Raises: + Exception: If the API request fails. + """ + api_url = "https://api.elevenlabs.io" + api_version = "v1" + api_endpoints = { + "voices": f"{api_url}/{api_version}/voices", + "generate": f"{api_url}/{api_version}/generate" + } + + api_headers = { + "Accept": "application/json", + "xi-api-key": api_key, + "Content-Type": "application/json", + } + + try: + # Get list of voices + voices_response = requests.get(api_endpoints["voices"], headers=api_headers) + voices_response.raise_for_status() + voices_data = voices_response.json() + elevenlabs_voices = voices_data['voices'] + except requests.RequestException as e: + logging.error(f"Failed to fetch data from ElevenLabs: {e}") + logging.error(traceback.format_exc()) + return None + + # Find the voice_id for the specified voice name + voice_id = next((v['voice_id'] for v in elevenlabs_voices if v['name'].lower() == voice.lower()), None) + if not voice_id: + raise ValueError(f"The specified voice '{voice}' does not exist in ElevenLabs.") + + audio_segments: List[AudioSegment] = [] + previous_request_ids: List[str] = [] + + for i, segment in enumerate(segments): + is_first_segment = i == 0 + is_last_segment = i == len(segments) - 1 + response = requests.post( + f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream", + json={ + "text": segment, + "model_id": model, + "output_format": 'pcm_44100', + # A maximum of three next or previous history item ids can be send + "previous_request_ids": previous_request_ids[-3:], + "previous_text": None if is_first_segment else " ".join(segments[:i]), + "next_text": None if is_last_segment else " ".join(segments[i + 1:]) + }, + headers={"xi-api-key": api_key}, + ) + + if response.status_code != 200: + raise RuntimeError(f"Error encountered, status: {response.status_code}, " + f"content: {response.text}") + + logging.info(f"Successfully converted segment {i + 1}/{len(segments)}") + previous_request_ids.append(response.headers["request-id"]) + audio_segment = AudioSegment.from_file(BytesIO(response.content)) + normalized_audio = audio_segment.apply_gain(-audio_segment.max_dBFS) + audio_segments.append(normalized_audio) + + return audio_segments + +def generate_voice_options(provider_name): + """Generate voice options for a specific TTS provider from environment variables. + + Args: + provider_name (str): The name of the TTS provider (e.g., 'ELEVENLABS'). + + Returns: + dict: A dictionary of voice options extracted from the environment variables. + """ + voice_options = {} + env_prefix = f"{provider_name}_".upper() + + for key, value in os.environ.items(): + if key.startswith(env_prefix): + # Remove the provider prefix and convert to lower case + option_key = key[len(env_prefix):].lower() + + # Convert "True" and "False" to boolean + if value.lower() == 'true': + option_value = True + elif value.lower() == 'false': + option_value = False + else: + # Attempt to convert to float or integer if possible, else leave as string + try: + option_value = float(value) if '.' in value else int(value) + except ValueError: + option_value = value + + voice_options[option_key] = option_value + + return voice_options + def generate_news_audio(): """Function to handle the news generation and audio output.""" feed_url = os.getenv('NEWS_READER_RSS', 'https://raw.githubusercontent.com/troykelly/live-news-break/main/demo.xml') station_name = os.getenv('NEWS_READER_STATION_NAME', 'Live News 24') reader_name = os.getenv('NEWS_READER_READER_NAME', 'Burnie Housedown') tts_voice = os.getenv('NEWS_READER_TTS_VOICE', 'alloy') - tts_quality = os.getenv('NEWS_READER_TTS_QUALITY', 'tts-1') + tts_model = os.getenv('NEWS_READER_TTS_MODEL', 'tts-1') + tts_provider = os.getenv('NEWS_READER_TTS_PROVIDER', 'openai') output_format = os.getenv('NEWS_READER_OUTPUT_FORMAT', 'flac') openai_api_key = os.getenv('OPENAI_API_KEY') @@ -1134,6 +1257,27 @@ def generate_news_audio(): article_start_time = None article_end_time = None + + vo_segments_text: List[str] = [] + + # Create the list of segments and generate them + for section in script_sections: + if not section in placeholder_to_key: + vo_segments_text.append(process_text_for_tts(section)) + + voice_provider_options = generate_voice_options(tts_provider) + + try: + if tts_provider == "elevenlabs": + vo_segments = elevenlabs_segments_to_speech(vo_segments_text, ELEVENLABS_API_KEY, tts_voice, tts_model, voice_provider_options) + elif tts_provider == "openai": + vo_segments = openai_segments_to_speech(vo_segments_text, openai_api_key, tts_voice, tts_model, voice_provider_options) + else: + raise ValueError(f"Unsupported TTS provider: {tts_provider}") + except Exception as e: + raise Exception(f"An error occurred with {tts_provider} TTS: {e}") + + current_speech_segment = 0 while current_index < len(script_sections): section = script_sections[current_index] @@ -1145,7 +1289,8 @@ def generate_news_audio(): if sfx_file: if current_index + 1 < len(script_sections): speech_text = script_sections[current_index + 1] - speech_audio = generate_speech(speech_text, openai_api_key, tts_voice, tts_quality, output_format) + # speech_audio = generate_speech(speech_text, openai_api_key, tts_voice, tts_quality, output_format) + speech_audio = vo_segments[current_speech_segment] mixed_audio, speech_start_time = generate_mixed_audio_and_track_timestamps(sfx_file, speech_audio, timing_value, total_elapsed_time * 1000) final_audio += mixed_audio @@ -1171,8 +1316,9 @@ def generate_news_audio(): logging.warning(f"No SFX file for {section}") current_index += 1 else: - speech_audio = generate_speech(section, openai_api_key, tts_voice, tts_quality, output_format) - + # speech_audio = generate_speech(section, openai_api_key, tts_voice, tts_quality, output_format) + speech_audio = vo_segments[current_speech_segment] + final_audio += speech_audio current_index += 1 @@ -1183,6 +1329,8 @@ def generate_news_audio(): timestamps.append(format_timestamp(total_elapsed_time)) lyrics_text.append(section) + current_speech_segment += 1 + # Post-process to add music bed bed_file = audio_files.get("BED", None) if bed_file and article_start_time is not None and article_end_time is not None: