Skip to content

Commit

Permalink
Merge pull request #81 from aedocw/main
Browse files Browse the repository at this point in the history
Merge for docker image build
  • Loading branch information
aedocw authored Dec 5, 2023
2 parents 600ea2d + a59030c commit 8a1c42e
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 12 deletions.
29 changes: 18 additions & 11 deletions epub2tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pedalboard.io import AudioFile
from pydub import AudioSegment
from pydub.silence import split_on_silence
import pysbd
from nltk.tokenize import sent_tokenize
import requests
import torch, gc
import torchaudio
Expand Down Expand Up @@ -118,8 +118,8 @@ def get_chapters_epub(self):
for i in range(len(self.chapters)):
#strip some characters that might have caused TTS to choke
text = self.chap2text(self.chapters[i])
text = text.replace("—", ", ").replace("--", ", ").replace(";", ", ").replace(":", ", ").replace("''", ", ")
allowed_chars = string.ascii_letters + string.digits + "-,.!? "
text = text.replace("—", ", ").replace("--", ", ").replace(";", ", ").replace(":", ", ").replace("''", ", ").replace("-", ", ")
allowed_chars = string.ascii_letters + string.digits + "-,.!?' "
text = ''.join(c for c in text if c in allowed_chars)
if len(text) < 150:
#too short to bother with
Expand All @@ -143,8 +143,7 @@ def read_chunk_xtts(self, sentences, wav_file_path):
#takes list of sentences to read, reads through them and saves to wave file
t0 = time.time()
wav_chunks = []
segmenter = pysbd.Segmenter(language="en", clean=True)
sentence_list = segmenter.segment(sentences)
sentence_list = sent_tokenize(sentences)
for i, sentence in enumerate(sentence_list):
# Run TTS for each sentence
print(sentence) if self.debug else None
Expand Down Expand Up @@ -206,8 +205,10 @@ def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate)
self.model_name = model_name
self.openai = openai
if engine == 'xtts':
self.voice_samples = voice_samples.split(",")
voice_name = "-" + re.split('-|\d+|\.', self.voice_samples[0])[0]
self.voice_samples = []
for f in voice_samples.split(","):
self.voice_samples.append(os.path.abspath(f))
voice_name = "-" + re.split('-|\d+|\.', os.path.basename(self.voice_samples[0]))[0]
elif engine == 'openai':
if speaker == 'p335':
speaker = 'onyx'
Expand All @@ -220,6 +221,8 @@ def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate)
print("Total characters: " + str(total_chars))
if engine == "xtts":
print("Loading model: " + self.xtts_model)
#This will trigger model load even though we won't use tts object later
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
config = XttsConfig()
model_json = self.xtts_model + "/config.json"
config.load_json(model_json)
Expand Down Expand Up @@ -251,16 +254,17 @@ def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate)
files = []
position = 0
start_time = time.time()
print("Reading from " + str(self.start) + " to " + str(self.end))
print("Reading from " + str(self.start + 1) + " to " + str(self.end))
for i in range(self.start, self.end):
outputwav = self.bookname + "-" + str(i+1) + ".wav"
if os.path.isfile(outputwav):
print(outputwav + " exists, skipping to next chapter")
else:
#print("Debug is " + str(self.debug))
tempfiles = []
segmenter = pysbd.Segmenter(language="en", clean=True)
sentences = segmenter.segment(self.chapters_to_read[i])
#segmenter = pysbd.Segmenter(language="en", clean=True)
#sentences = segmenter.segment(self.chapters_to_read[i])
sentences = sent_tokenize(self.chapters_to_read[i])
sentence_groups = list(self.combine_sentences(sentences))
for x in tqdm(range(len(sentence_groups))):
retries = 1
Expand All @@ -278,8 +282,10 @@ def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate)
elif engine == "tts":
if model_name == 'tts_models/en/vctk/vits':
#assume we're using a multi-speaker model
print(sentence_groups[x]) if self.debug else None
self.tts.tts_to_file(text = sentence_groups[x], speaker = speaker, file_path = tempwav)
else:
print(sentence_groups[x]) if self.debug else None
self.tts.tts_to_file(text = sentence_groups[x], file_path = tempwav)
ratio = self.compare(sentence_groups[x], tempwav)
if ratio < self.minratio:
Expand Down Expand Up @@ -312,6 +318,7 @@ def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate)
wav_files = [AudioSegment.from_wav(f"{f}") for f in files]
one_sec_silence = AudioSegment.silent(duration=1000)
concatenated = AudioSegment.empty()
print("Replacing silences longer than one second with one second of silence (" + str(len(wav_files)) + " files)")
for audio in wav_files:
# Split audio into chunks where detected silence is longer than one second
chunks = split_on_silence(audio, min_silence_len=1000, silence_thresh=-50)
Expand Down Expand Up @@ -343,7 +350,7 @@ def main():
description='Read an epub (or other source) to audiobook format')
parser.add_argument('sourcefile', type=str, help='The epub or text file to process')
parser.add_argument('--engine', type=str, default='tts', nargs='?', const='tts', help='Which TTS to use [tts|xtts|openai]')
parser.add_argument('--xtts', type=str, nargs='?', const="zzz", default="zzz", help='Sample wave file(s) for XTTS training separated by commas')
parser.add_argument('--xtts', type=str, nargs='?', const="zzz", default="zzz", help='Sample wave file(s) for XTTS v2 training separated by commas')
parser.add_argument('--openai', type=str, nargs='?', const="zzz", default="zzz", help='OpenAI API key if engine is OpenAI')
parser.add_argument('--model', type=str, nargs='?', const='tts_models/en/vctk/vits', default='tts_models/en/vctk/vits', help='TTS model to use, default: tts_models/en/vctk/vits')
parser.add_argument('--speaker', type=str, default='p335', nargs='?', const='p335', help='Speaker to use (ex p335 for VITS, or onyx for OpenAI)')
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ beautifulsoup4
ebooklib
fuzzywuzzy
newspaper3k
nltk
noisereduce
openai
openai-whisper
Expand Down
Binary file removed sample.mp3
Binary file not shown.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
author_email='[email protected]',
url='https://github.com/aedocw/epub2tts',
license='Apache License, Version 2.0',
version='2.0.0',
version='2.0.6',
packages=find_packages(),
install_requires=requirements,
py_modules=['epub2tts'],
Expand Down

0 comments on commit 8a1c42e

Please sign in to comment.