From 396413c5497c69e99c6a2018476b9e758793ce33 Mon Sep 17 00:00:00 2001 From: Christopher Aedo Date: Mon, 20 Nov 2023 12:17:31 -0800 Subject: [PATCH 1/2] first test with xtts --- epub2tts.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/epub2tts.py b/epub2tts.py index 5dc6354..13ad484 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -44,7 +44,6 @@ device = "cpu" print(f"Using device: {device}") -model_name = "tts_models/en/vctk/vits" blacklist = ['[document]', 'noscript', 'header', 'html', 'meta', 'head', 'input', 'script'] ffmetadatafile = "FFMETADATAFILE" @@ -244,6 +243,12 @@ def combine_sentences(sentences, length=3500): yield combined def main(): + if "--xtts" in sys.argv: + model_name = "tts_models/multilingual/multi-dataset/xtts_v2" + index = sys.argv.index("--xtts") + speaker_wav = sys.argv[index + 1] + else: + model_name = "tts_models/en/vctk/vits" bookname = get_bookname() #detect .txt, .epub or https booktype = bookname.split('.')[-1] speaker_used = get_speaker() @@ -318,7 +323,26 @@ def main(): for f in tempfiles: os.remove(f) else: - tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav) + if "--xtts" in sys.argv: + + tempfiles = [] + segmenter = pysbd.Segmenter(language="en", clean=True) + sentences = segmenter.segment(chapters_to_read[i]) + sentence_groups = list(combine_sentences(sentences), 250) + for x in range(len(sentence_groups)): + tempwav = "temp" + str(x) + ".wav" + print(sentence_groups[x]) + tts.tts_to_file(text=sentence_groups[x], speaker_wav = speaker_wav, file_path=tempwav, language="en") + tempfiles.append(tempwav) + tempwavfiles = [AudioSegment.from_mp3(f"{f}") for f in tempfiles] + concatenated = sum(tempwavfiles) + concatenated.export(outputwav, format="wav") +# for f in tempfiles: +# os.remove(f) + + else: + tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav) + files.append(outputwav) position += len(chapters_to_read[i]) From 70effba8990ed063c6d04dc30dacb3d5a93194c5 Mon Sep 17 00:00:00 2001 From: Christopher Aedo Date: Mon, 20 Nov 2023 20:50:07 -0800 Subject: [PATCH 2/2] Working great, needs cleanup though --- README.md | 2 ++ epub2tts.py | 23 ++++++++++++----------- setup.py | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index ddb7c1b..630156c 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Usage: URL: `epub2tts --url https://www.example.com/page --name example-page` +To use Coqui XTTS, add: `--xtts ` (GPU absolutely required, and even then it's slow but sounds amazing!) + To use OpenAI TTS, add: `--openai ` (Use speaker option to specify voice other than onyx: `--speaker shimmer`) To change speaker (ex p307 for a good male voice), add: `--speaker p307` diff --git a/epub2tts.py b/epub2tts.py index 13ad484..64a6d2d 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -57,6 +57,8 @@ helpful for finding which chapter to start and end on if you want to skip TOC, bibliography, etc. +To use Coqui XTTS, add: --xtts (GPU absolutely required, and even then it's slow but sounds amazing!) +To use OpenAI TTS, add: --openai (Use speaker option to specify voice other than onyx: `--speaker shimmer`) To change speaker (ex p307 for a good male voice), add: --speaker p307 To output in mp3 format instead of m4b, add: --mp3 To skip reading any links, add: --skip-links @@ -135,10 +137,11 @@ def get_speaker(): if "--speaker" in sys.argv: index = sys.argv.index("--speaker") speaker_used = sys.argv[index + 1] - else: - if "--openai" in sys.argv: + elif "--openai" in sys.argv: speaker_used = "onyx" - else: + elif "--xtts" in sys.argv: + speaker_used = "xtts" + else: speaker_used = "p335" print(f"Speaker: {speaker_used}") return(speaker_used) @@ -162,9 +165,8 @@ def get_chapters_epub(book, bookname): for i in range(len(chapters)): #strip some characters that might have caused TTS to choke text = chap2text(chapters[i]) - #this still misses a lot of special characters... - #text = text.translate({ord(c): None for c in '[]*“”"\''}) - allowed_chars = string.ascii_letters + string.digits + '-,.!? ' + text = text.replace("—", ", ") + allowed_chars = string.ascii_letters + string.digits + "-,.!? '" text = ''.join(c for c in text if c in allowed_chars) if len(text) < 150: #too short to bother with @@ -324,21 +326,20 @@ def main(): os.remove(f) else: if "--xtts" in sys.argv: - +#look at all this disgusting duplicated code! FIX IT!!! tempfiles = [] segmenter = pysbd.Segmenter(language="en", clean=True) sentences = segmenter.segment(chapters_to_read[i]) - sentence_groups = list(combine_sentences(sentences), 250) + sentence_groups = list(combine_sentences(sentences, 1000)) for x in range(len(sentence_groups)): tempwav = "temp" + str(x) + ".wav" - print(sentence_groups[x]) tts.tts_to_file(text=sentence_groups[x], speaker_wav = speaker_wav, file_path=tempwav, language="en") tempfiles.append(tempwav) tempwavfiles = [AudioSegment.from_mp3(f"{f}") for f in tempfiles] concatenated = sum(tempwavfiles) concatenated.export(outputwav, format="wav") -# for f in tempfiles: -# os.remove(f) + for f in tempfiles: + os.remove(f) else: tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav) diff --git a/setup.py b/setup.py index 65a7405..2d6c0d0 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ author_email='doc@aedo.net', url='https://github.com/aedocw/epub2tts', license='Apache License, Version 2.0', - version='1.4.0', + version='1.5.0', packages=find_packages(), install_requires=requirements, py_modules=['epub2tts'],