-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added functionality for using pyttsx3 instead of coqui-ai TTS #33
base: dev
Are you sure you want to change the base?
Changes from 1 commit
6bf6c26
a8e9a65
f13abf1
c9e0b47
ff8bc70
00e17c0
3b58279
7477680
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,53 +3,125 @@ | |
import numpy as np | ||
import strauss.utilities as utils | ||
import re | ||
|
||
try: | ||
from TTS.api import TTS | ||
ttsMode = 'coqui-TTS' | ||
except (OSError, ModuleNotFoundError) as sderr: | ||
def TTS(*args, **kwargs): | ||
raise TTSIsNotSupported("strauss has not been installed with text-to-speech support. \n" | ||
"This is not installed by default, due to some specific module requirements of the TTS module." | ||
"Reinstalling strauss with 'pip install strauss[TTS]' will give you access to this function") | ||
|
||
print('Coqui TTS not found. Trying to import pyttsx3...') | ||
try: | ||
import pyttsx3 | ||
ttsMode = 'pyttsx3' | ||
print('pyttsx3 has been successfully imported.') | ||
except (OSError, ModuleNotFoundError) as sderr: | ||
ttsMode = 'None' | ||
print('No supported text-to-speech packages have been found.') | ||
def TTS(*args, **kwargs): | ||
raise TTSIsNotSupported("strauss has not been installed with text-to-speech support. \n" | ||
"This is not installed by default, due to some specific module requirements of the TTS module.\n" | ||
"Reinstalling strauss with 'pip install strauss[TTS]' will give you access to this function\n" | ||
"If you run into issues with the TTS package, you can also install pyttsx3 with the command\n" | ||
"'pip install pyttsx3'.") | ||
|
||
class TTSIsNotSupported(Exception): | ||
pass | ||
|
||
def get_ttsMode(): | ||
return ttsMode | ||
|
||
def getVoices(info=False): | ||
'''Get available voices for text-to-speech. | ||
|
||
When info=True, this prints out information | ||
for each voice option. | ||
|
||
Args: | ||
info (:obj:`bool`): Print out voice information when True, | ||
by default False | ||
voices (:obj:`list`): List of ``pyttsx3.voice.Voice`` objects | ||
''' | ||
if ttsMode == 'pyttsx3': | ||
engine = pyttsx3.init() | ||
voices = engine.getProperty('voices') | ||
if info==True: | ||
print('Text-to-speech voice options') | ||
for ind in range(len(voices)): | ||
voiceProps = vars(voices[ind]) | ||
print('\nVoice index:', ind) | ||
for key in voiceProps.keys(): | ||
print('{}: {}'.format(key, voiceProps[key])) | ||
else: | ||
pass | ||
return voices | ||
|
||
def render_caption(caption, samprate, model, caption_path): | ||
'''The render_caption function generates an audio caption from text input | ||
and writes it as a wav file. If the sample rate of the model is not equal | ||
to that passed from sonification.py, it resamples to the correct rate and | ||
re-writes the file. Text from user input is converted with text-to-speech | ||
software from Coqui-AI - https://pypi.org/project/TTS/ . You can view | ||
publicly available voice models with 'TTS.list_models()' | ||
re-writes the file. | ||
|
||
If Coqui-AI is installed, text from user input is converted with text-to- | ||
speech software from Coqui-AI - https://pypi.org/project/TTS/ . | ||
You can view publicly available voice models with 'TTS.list_models()' | ||
|
||
If Coqui-AI is not installed but pyttsx3 (https://pypi.org/project/pyttsx3/) | ||
is installed, text from user input is converted offline using pyttsx3. | ||
|
||
Note: | ||
STRAUSS checks if Coqui-AI is available. If it is, ``ttsMode`` is set to | ||
``coqui-ai``. If it is unavailable, STRAUSS checks whether pyttsx3 is | ||
available. If it is, ``ttsMode`` is set to ``pyttsx3``. | ||
|
||
Args: | ||
caption (:obj:`str`): script to be spoken by the TTS voice | ||
samprate (:obj:`int`): samples per second | ||
model (:obj:`str`): valid name of TTS voice from the underying TTS | ||
module | ||
model (:obj:`str`): valid name of TTS voice from the underying TTS | ||
module | ||
model (:obj:`str` for Coqui-AI; :obj:`dict` for pyttsx3): for Coqui-AI: | ||
valid name of TTS voice from the underlying TTS module; for pyttsx3: | ||
dictionary with keys of 'rate' (percent of speed), 'volume' (float from 0 to 1), | ||
and/or 'voices' () | ||
caption_path (:obj:`str`): filepath for spoken caption output | ||
''' | ||
|
||
# TODO: do this better with logging. We can filter TTS function output, e.g. alert to downloading models... | ||
print('Rendering caption (this can take a while if the caption is long, or if the TTS model needs downloading)...') | ||
if ttsMode == 'coqui-TTS': | ||
# TODO: do this better with logging. We can filter TTS function output, e.g. alert to downloading models... | ||
print('Rendering caption (this can take a while if the caption is long, or if the TTS model needs downloading)...') | ||
|
||
# capture stdout from the talkative TTS module | ||
with utils.Capturing() as output: | ||
# Load in the tts model | ||
tts = TTS(model, progress_bar=False, gpu=False) | ||
|
||
# render to speech, and write as a wav file (allow ) | ||
tts.tts_to_file(text=caption, file_path=caption_path) | ||
|
||
# capture stdout from the talkative TTS module | ||
with utils.Capturing() as output: | ||
# Load in the tts model | ||
tts = TTS(model, progress_bar=False, gpu=False) | ||
elif ttsMode == 'pyttsx3': | ||
print('Rendering caption (this can take a while if the caption is long)...') | ||
|
||
# capture stdout from the talkative TTS module | ||
with utils.Capturing() as output: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like |
||
# Setup voice model for pyttsx3 | ||
engine = pyttsx3.init() # initialize object | ||
|
||
# render to speech, and write as a wav file (allow ) | ||
tts.tts_to_file(text=caption, file_path=caption_path) | ||
# check what model info was set; if none were | ||
# specified, use defaults | ||
for key in ['rate','volume','voices']: | ||
if key in model.keys(): | ||
engine.setProperty(key, model[key]) | ||
else: | ||
pass | ||
|
||
|
||
# render to speech, and write as a wav file (allow ) | ||
engine.save_to_file(text=caption, filename=caption_path) | ||
engine.runAndWait() | ||
|
||
# Read the file back in to check the sample rate | ||
rate_in, wavobj = wavfile.read(caption_path) | ||
|
||
#If it doesn't match the required rate, resample and re-write | ||
if rate_in != samprate: | ||
new_wavobj = utils.resample(rate_in, samprate, wavobj) | ||
wavfile.write(caption_path, samprate, new_wavobj) | ||
else: | ||
TTS() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can move this to L116 in order to work correctly for no tts modules case |
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can comment out these prints for now - we want to implement logging / debug text at some point but generally trying to keep quiet unless needed