Skip to content

Commit

Permalink
Merge pull request #65 from aedocw/main
Browse files Browse the repository at this point in the history
Merging from main for docker image build.
  • Loading branch information
aedocw authored Nov 21, 2023
2 parents b6be765 + 69a3961 commit d1b8908
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 9 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Usage:

URL: `epub2tts --url https://www.example.com/page --name example-page`

To use Coqui XTTS, add: `--xtts <sample.wav>` (GPU absolutely required, and even then it's slow but sounds amazing!)

To use OpenAI TTS, add: `--openai <your API key>` (Use speaker option to specify voice other than onyx: `--speaker shimmer`)

To change speaker (ex p307 for a good male voice), add: `--speaker p307`
Expand Down
41 changes: 33 additions & 8 deletions epub2tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
device = "cpu"
print(f"Using device: {device}")

model_name = "tts_models/en/vctk/vits"
blacklist = ['[document]', 'noscript', 'header', 'html', 'meta', 'head', 'input', 'script']
ffmetadatafile = "FFMETADATAFILE"

Expand All @@ -58,6 +57,8 @@
helpful for finding which chapter to start and end on if you want to
skip TOC, bibliography, etc.
To use Coqui XTTS, add: --xtts <sample.wav> (GPU absolutely required, and even then it's slow but sounds amazing!)
To use OpenAI TTS, add: --openai <your API key> (Use speaker option to specify voice other than onyx: `--speaker shimmer`)
To change speaker (ex p307 for a good male voice), add: --speaker p307
To output in mp3 format instead of m4b, add: --mp3
To skip reading any links, add: --skip-links
Expand Down Expand Up @@ -136,10 +137,11 @@ def get_speaker():
if "--speaker" in sys.argv:
index = sys.argv.index("--speaker")
speaker_used = sys.argv[index + 1]
else:
if "--openai" in sys.argv:
elif "--openai" in sys.argv:
speaker_used = "onyx"
else:
elif "--xtts" in sys.argv:
speaker_used = "xtts"
else:
speaker_used = "p335"
print(f"Speaker: {speaker_used}")
return(speaker_used)
Expand All @@ -163,9 +165,8 @@ def get_chapters_epub(book, bookname):
for i in range(len(chapters)):
#strip some characters that might have caused TTS to choke
text = chap2text(chapters[i])
#this still misses a lot of special characters...
#text = text.translate({ord(c): None for c in '[]*“”"\''})
allowed_chars = string.ascii_letters + string.digits + '-,.!? '
text = text.replace("—", ", ")
allowed_chars = string.ascii_letters + string.digits + "-,.!? '"
text = ''.join(c for c in text if c in allowed_chars)
if len(text) < 150:
#too short to bother with
Expand Down Expand Up @@ -244,6 +245,12 @@ def combine_sentences(sentences, length=3500):
yield combined

def main():
if "--xtts" in sys.argv:
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
index = sys.argv.index("--xtts")
speaker_wav = sys.argv[index + 1]
else:
model_name = "tts_models/en/vctk/vits"
bookname = get_bookname() #detect .txt, .epub or https
booktype = bookname.split('.')[-1]
speaker_used = get_speaker()
Expand Down Expand Up @@ -318,7 +325,25 @@ def main():
for f in tempfiles:
os.remove(f)
else:
tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav)
if "--xtts" in sys.argv:
#look at all this disgusting duplicated code! FIX IT!!!
tempfiles = []
segmenter = pysbd.Segmenter(language="en", clean=True)
sentences = segmenter.segment(chapters_to_read[i])
sentence_groups = list(combine_sentences(sentences, 1000))
for x in range(len(sentence_groups)):
tempwav = "temp" + str(x) + ".wav"
tts.tts_to_file(text=sentence_groups[x], speaker_wav = speaker_wav, file_path=tempwav, language="en")
tempfiles.append(tempwav)
tempwavfiles = [AudioSegment.from_mp3(f"{f}") for f in tempfiles]
concatenated = sum(tempwavfiles)
concatenated.export(outputwav, format="wav")
for f in tempfiles:
os.remove(f)

else:
tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav)


files.append(outputwav)
position += len(chapters_to_read[i])
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
author_email='[email protected]',
url='https://github.com/aedocw/epub2tts',
license='Apache License, Version 2.0',
version='1.4.0',
version='1.5.0',
packages=find_packages(),
install_requires=requirements,
py_modules=['epub2tts'],
Expand Down

0 comments on commit d1b8908

Please sign in to comment.