diff --git a/README.md b/README.md index ce2b040..ddb7c1b 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ This script takes an epub (or text file) and reads it to an mp3 or an m4b audiob I recognize this is not very user friendly, but I wanted to share in case folks thought it was useful. If there are a few more people than myself that find this is useful I will keep working on turning it into something that could be used by someone without dev experience. +**NOTE:** Now with [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech) support! It's not free, but the average cost for a few books I tested was around $7. If you use `--openai ` flag epub2tts will provide a cost estimate and prompt you to approve before continuing. + **NOTE:** HUGE thanks to a recent PR from [wonka929](https://github.com/wonka929), epub2tts now recognizes when a CUDA GPU is available and will use it automatically. In a brief test I did, the speedup was incredible! ## USAGE: @@ -13,6 +15,8 @@ Usage: URL: `epub2tts --url https://www.example.com/page --name example-page` +To use OpenAI TTS, add: `--openai ` (Use speaker option to specify voice other than onyx: `--speaker shimmer`) + To change speaker (ex p307 for a good male voice), add: `--speaker p307` To output in mp3 format instead of m4b, add: `--mp3` diff --git a/epub2tts.py b/epub2tts.py index 04ae106..5dc6354 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -28,8 +28,11 @@ from ebooklib import epub from newspaper import Article from pydub import AudioSegment +import pysbd from TTS.api import TTS import torch, gc +from openai import OpenAI + # Verify if CUDA or mps is available and select it if torch.cuda.is_available(): @@ -134,7 +137,10 @@ def get_speaker(): index = sys.argv.index("--speaker") speaker_used = sys.argv[index + 1] else: - speaker_used = "p335" + if "--openai" in sys.argv: + speaker_used = "onyx" + else: + speaker_used = "p335" print(f"Speaker: {speaker_used}") return(speaker_used) @@ -219,10 +225,29 @@ def get_end(chapters_to_read): end = len(chapters_to_read) return(end) +def get_api_key(): + if "--openai" in sys.argv: + key = str(sys.argv[sys.argv.index("--openai") + 1]) + else: + key = '' + print(key) + return(key) + +def combine_sentences(sentences, length=3500): + combined = "" + for sentence in sentences: + if len(combined) + len(sentence) <= length: + combined += sentence + " " + else: + yield combined + combined = sentence + yield combined + def main(): bookname = get_bookname() #detect .txt, .epub or https booktype = bookname.split('.')[-1] speaker_used = get_speaker() + openai_api_key = get_api_key() if booktype == "epub": book = epub.read_epub(bookname) chapters_to_read = get_chapters_epub(book, bookname) @@ -249,17 +274,52 @@ def main(): start = get_start() end = get_end(chapters_to_read) total_chars = get_length(start, end, chapters_to_read) + print("Total characters: " + str(total_chars)) + if "--openai" in sys.argv: + while True: + openai_sdcost = (total_chars/1000) * 0.015 + print("OpenAI TTS SD Cost: $" + str(openai_sdcost)) + user_input = input("This will not be free, continue? (y/n): ") + if user_input.lower() not in ['y', 'n']: + print("Invalid input. Please enter y for yes or n for no.") + elif user_input.lower() == 'n': + sys.exit() + else: + print("Continuing...") + break files = [] position = 0 start_time = time.time() - tts = TTS(model_name).to(device) + if "--openai" in sys.argv: + client = OpenAI(api_key=openai_api_key) + else: + tts = TTS(model_name).to(device) + for i in range(start, end): outputwav = bookname.split(".")[0]+"-"+str(i+1)+".wav" print("Reading " + str(i)) if os.path.isfile(outputwav): print(outputwav + " exists, skipping to next chapter") else: - tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav) + if "--openai" in sys.argv: + tempfiles = [] + segmenter = pysbd.Segmenter(language="en", clean=True) + sentences = segmenter.segment(chapters_to_read[i]) + sentence_groups = list(combine_sentences(sentences)) + for x in range(len(sentence_groups)): + tempwav = "temp" + str(x) + ".mp3" + print(sentence_groups[x]) + response = client.audio.speech.create( model="tts-1", voice=speaker_used, input=sentence_groups[x]) + response.stream_to_file(tempwav) + tempfiles.append(tempwav) + tempwavfiles = [AudioSegment.from_mp3(f"{f}") for f in tempfiles] + concatenated = sum(tempwavfiles) + concatenated.export(outputwav, format="wav") + for f in tempfiles: + os.remove(f) + else: + tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav) + files.append(outputwav) position += len(chapters_to_read[i]) percentage = (position / total_chars) *100 diff --git a/requirements.txt b/requirements.txt index 8fbf414..bb31778 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ TTS ebooklib beautifulsoup4 +openai pydub -newspaper3k +pysbd +newspaper3k \ No newline at end of file diff --git a/setup.py b/setup.py index ef289da..65a7405 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ author_email='doc@aedo.net', url='https://github.com/aedocw/epub2tts', license='Apache License, Version 2.0', - version='1.3.12', + version='1.4.0', packages=find_packages(), install_requires=requirements, py_modules=['epub2tts'],