Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added functionality for using pyttsx3 instead of coqui-ai TTS #33

Open
wants to merge 8 commits into
base: dev
Choose a base branch
from
24 changes: 19 additions & 5 deletions src/strauss/sonification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .stream import Stream
from .channels import audio_channels
from .utilities import const_or_evo, nested_dict_idx_reassign, NoSoundDevice
from .tts_caption import render_caption
from .tts_caption import render_caption, get_ttsMode
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
Expand Down Expand Up @@ -65,13 +65,13 @@ class Sonification:
"""
def __init__(self, score, sources, generator, audio_setup='stereo',
caption=None, samprate=48000,
ttsmodel=Path('tts_models','en','jenny', 'jenny')):
ttsmodel=None):

# sampling rate in Hz
self.samprate = samprate

# tts model name
self.ttsmodel = str(ttsmodel)
self.ttsmodel = ttsmodel

# caption
self.caption = caption
Expand Down Expand Up @@ -185,11 +185,25 @@ def render(self, downsamp=1):

# produce mono audio of caption, if one is provided
if str(self.caption or '').strip():
ttsMode = get_ttsMode() # determine if using coqui-ai or pyttsx3

# use a temporary directory to ensure caption file cleanup
with tempfile.TemporaryDirectory() as cdir:
cpath = Path(cdir, 'caption.wav')
render_caption(self.caption, self.samprate,
self.ttsmodel, cpath)
if ttsMode == 'coqui-ai':
if self.ttsmodel == None:
self.ttsmodel = Path('tts_models','en','jenny', 'jenny')
else:
pass
render_caption(self.caption, self.samprate,
str(self.ttsmodel), cpath)
else:
if self.ttsmodel == None:
self.ttsmodel = {}
else:
pass
render_caption(self.caption, self.samprate,
self.ttsmodel, str(cpath))
rate_in, wavobj = wavfile.read(cpath)
wavobj = np.array(wavobj)
# Set up the Stream objects for TTS
Expand Down
114 changes: 93 additions & 21 deletions src/strauss/tts_caption.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,125 @@
import numpy as np
import strauss.utilities as utils
import re

try:
from TTS.api import TTS
ttsMode = 'coqui-TTS'
except (OSError, ModuleNotFoundError) as sderr:
def TTS(*args, **kwargs):
raise TTSIsNotSupported("strauss has not been installed with text-to-speech support. \n"
"This is not installed by default, due to some specific module requirements of the TTS module."
"Reinstalling strauss with 'pip install strauss[TTS]' will give you access to this function")

print('Coqui TTS not found. Trying to import pyttsx3...')
try:
import pyttsx3
ttsMode = 'pyttsx3'
print('pyttsx3 has been successfully imported.')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can comment out these prints for now - we want to implement logging / debug text at some point but generally trying to keep quiet unless needed

except (OSError, ModuleNotFoundError) as sderr:
ttsMode = 'None'
print('No supported text-to-speech packages have been found.')
def TTS(*args, **kwargs):
raise TTSIsNotSupported("strauss has not been installed with text-to-speech support. \n"
"This is not installed by default, due to some specific module requirements of the TTS module.\n"
"Reinstalling strauss with 'pip install strauss[TTS]' will give you access to this function\n"
"If you run into issues with the TTS package, you can also install pyttsx3 with the command\n"
"'pip install pyttsx3'.")

class TTSIsNotSupported(Exception):
pass

def get_ttsMode():
return ttsMode

def getVoices(info=False):
'''Get available voices for text-to-speech.

When info=True, this prints out information
for each voice option.

Args:
info (:obj:`bool`): Print out voice information when True,
by default False
voices (:obj:`list`): List of ``pyttsx3.voice.Voice`` objects
'''
if ttsMode == 'pyttsx3':
engine = pyttsx3.init()
voices = engine.getProperty('voices')
if info==True:
print('Text-to-speech voice options')
for ind in range(len(voices)):
voiceProps = vars(voices[ind])
print('\nVoice index:', ind)
for key in voiceProps.keys():
print('{}: {}'.format(key, voiceProps[key]))
else:
pass
return voices

def render_caption(caption, samprate, model, caption_path):
'''The render_caption function generates an audio caption from text input
and writes it as a wav file. If the sample rate of the model is not equal
to that passed from sonification.py, it resamples to the correct rate and
re-writes the file. Text from user input is converted with text-to-speech
software from Coqui-AI - https://pypi.org/project/TTS/ . You can view
publicly available voice models with 'TTS.list_models()'
re-writes the file.

If Coqui-AI is installed, text from user input is converted with text-to-
speech software from Coqui-AI - https://pypi.org/project/TTS/ .
You can view publicly available voice models with 'TTS.list_models()'

If Coqui-AI is not installed but pyttsx3 (https://pypi.org/project/pyttsx3/)
is installed, text from user input is converted offline using pyttsx3.

Note:
STRAUSS checks if Coqui-AI is available. If it is, ``ttsMode`` is set to
``coqui-ai``. If it is unavailable, STRAUSS checks whether pyttsx3 is
available. If it is, ``ttsMode`` is set to ``pyttsx3``.

Args:
caption (:obj:`str`): script to be spoken by the TTS voice
samprate (:obj:`int`): samples per second
model (:obj:`str`): valid name of TTS voice from the underying TTS
module
model (:obj:`str`): valid name of TTS voice from the underying TTS
module
model (:obj:`str` for Coqui-AI; :obj:`dict` for pyttsx3): for Coqui-AI:
valid name of TTS voice from the underlying TTS module; for pyttsx3:
dictionary with keys of 'rate' (percent of speed), 'volume' (float from 0 to 1),
and/or 'voices' ()
caption_path (:obj:`str`): filepath for spoken caption output
'''

# TODO: do this better with logging. We can filter TTS function output, e.g. alert to downloading models...
print('Rendering caption (this can take a while if the caption is long, or if the TTS model needs downloading)...')
if ttsMode == 'coqui-TTS':
# TODO: do this better with logging. We can filter TTS function output, e.g. alert to downloading models...
print('Rendering caption (this can take a while if the caption is long, or if the TTS model needs downloading)...')

# capture stdout from the talkative TTS module
with utils.Capturing() as output:
# Load in the tts model
tts = TTS(model, progress_bar=False, gpu=False)

# render to speech, and write as a wav file (allow )
tts.tts_to_file(text=caption, file_path=caption_path)

# capture stdout from the talkative TTS module
with utils.Capturing() as output:
# Load in the tts model
tts = TTS(model, progress_bar=False, gpu=False)
elif ttsMode == 'pyttsx3':
print('Rendering caption (this can take a while if the caption is long)...')

# capture stdout from the talkative TTS module
with utils.Capturing() as output:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like pttsx3 is quiet - so perhaps don't need the capturing environment in this case

# Setup voice model for pyttsx3
engine = pyttsx3.init() # initialize object

# render to speech, and write as a wav file (allow )
tts.tts_to_file(text=caption, file_path=caption_path)
# check what model info was set; if none were
# specified, use defaults
for key in ['rate','volume','voices']:
if key in model.keys():
engine.setProperty(key, model[key])
else:
pass


# render to speech, and write as a wav file (allow )
engine.save_to_file(text=caption, filename=caption_path)
engine.runAndWait()

# Read the file back in to check the sample rate
rate_in, wavobj = wavfile.read(caption_path)

#If it doesn't match the required rate, resample and re-write
if rate_in != samprate:
new_wavobj = utils.resample(rate_in, samprate, wavobj)
wavfile.write(caption_path, samprate, new_wavobj)
else:
TTS()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can move this to L116 in order to work correctly for no tts modules case