forked from OrangeX4/latex2sympy
-
Notifications
You must be signed in to change notification settings - Fork 2
/
tts.py
55 lines (43 loc) · 2.2 KB
/
tts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""Synthesizes speech from the input string of text or ssml.
Make sure to be working in a virtual environment.
Note: ssml must be well-formed according to:
https://www.w3.org/TR/speech-synthesis/
"""
from google.cloud import texttospeech
from google.cloud.texttospeech_v1.types import SynthesizeSpeechResponse
# NOTE: We need the GOOGLE_APPLICATION_CREDENTIALS environment variable to be set
# When running this file manually (to test the TTS server), uncomment the lines below, making sure ou have `service_account.json` in the same directory as this file
# While running the server, the environment variable is set automatically by Cloud Run
# When running locally from the server, the environment variable is set automatically by the server
# import os
# # Inside Cloud Run, the service account key is stored in the environment variable automatically
# # but locally, we need to set it manually
# if os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is None:
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service_account_key.json"
def generate_mp3(words: str) -> bytes:
# Instantiates a client
client = texttospeech.TextToSpeechClient()
print(f' -> Text to speech: reading {words}')
synthesis_input = texttospeech.SynthesisInput(text=words)
# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.VoiceSelectionParams(
language_code="en-US", name="en-US-Neural2-D"
)
# Select the type of audio file you want returned
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
print(f' -> Text to speech: done')
# The response's audio_content is binary.
return bytes(response.audio_content)
# filename = "output.mp3"
# with open(filename, "wb") as out:
# # Write the response to the output file.
# out.write(response.audio_content)
# print('Audio content written to file', filename)