TTS: Added speaking rate, pitch & languageCode

vishen · Jan 22, 2021 · 9d853ad · 9d853ad
1 parent 4a4e399
commit 9d853ad
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -33,8 +33,10 @@ device uuid, `-u <uuid>`, the results will be cached and it will connect to the
 ## Installing
 
 ### Install release binaries
+
 https://github.com/vishen/go-chromecast/releases
-* If using Linux: Download the latest release, unzip using `tar -xzf go-chromecast.tar.gz`, and install using `sudo install ./go-chromecast /usr/bin/`
+
+- If using Linux: Download the latest release, unzip using `tar -xzf go-chromecast.tar.gz`, and install using `sudo install ./go-chromecast /usr/bin/`
 
 ### Install the usual Go way:
 
@@ -96,8 +98,8 @@ Flags:
 Use "go-chromecast [command] --help" for more information about a command.
 ```
 
-
 ## Usage
+
 ```
 # View available cast devices.
 $ go-chromecast ls
@@ -203,13 +205,14 @@ $ go-chromecast ui
 ![User-interface example](go-chromecast-ui.png "User-interface example")
 
 A basic terminal user-interface is provided, that supports the following controls:
-* Quit: "q"
-* Play/Pause: SPACE
-* Volume: - / +
-* Mute/Unmute: "m"
-* Seek (15s): <- / ->
-* Previous/Next: PgUp / PgDn
-* Stop: "s"
+
+- Quit: "q"
+- Play/Pause: SPACE
+- Volume: - / +
+- Mute/Unmute: "m"
+- Seek (15s): <- / ->
+- Previous/Next: PgUp / PgDn
+- Stop: "s"
 
 It can be run in the following ways:
 
@@ -311,5 +314,7 @@ For non en-US languages
 
 ```
 $ go-chromecast tts <message_to_say> --google-service-account=/path/to/service/account.json \
-  --voice-name en-US-Wavenet-G
+  --voice-name en-US-Wavenet-G --speaking-rate 1.05 --pitch 0.9
 ```
+
+List of available voices (voice-name) can be found here: https://cloud.google.com/text-to-speech/
diff --git a/cmd/tts.go b/cmd/tts.go
@@ -39,8 +39,11 @@ var ttsCmd = &cobra.Command{
 			fmt.Printf("--google-service-account is required\n")
 			return
 		}
-
+
+		languageCode, _ := cmd.Flags().GetString("language-code")
 		voiceName, _ := cmd.Flags().GetString("voice-name")
+		speakingRate, _ := cmd.Flags().GetFloat32("speaking-rate") 
+		pitch, _ := cmd.Flags().GetFloat32("pitch") 
 
 		b, err := ioutil.ReadFile(googleServiceAccount)
 		if err != nil {
@@ -54,7 +57,7 @@ var ttsCmd = &cobra.Command{
 			return
 		}
 
-		data, err := tts.Create(args[0], b, voiceName)
+		data, err := tts.Create(args[0], b, languageCode, voiceName, speakingRate, pitch)
 		if err != nil {
 			fmt.Printf("%v\n", err)
 			return
@@ -88,5 +91,8 @@ var ttsCmd = &cobra.Command{
 func init() {
 	rootCmd.AddCommand(ttsCmd)
 	ttsCmd.Flags().String("google-service-account", "", "google service account JSON file")
-	ttsCmd.Flags().String("voice-name", "en-US", "text-to-speech Voice (en-US-Wavenet-G, pl-PL-Wavenet-A, pl-PL-Wavenet-B, de-DE-Wavenet-A)")
+	ttsCmd.Flags().String("language-code", "en-US", "text-to-speech Language Code (de-DE, ja-JP,...)")
+	ttsCmd.Flags().String("voice-name", "en-US-Wavenet-G", "text-to-speech Voice (en-US-Wavenet-G, pl-PL-Wavenet-A, pl-PL-Wavenet-B, de-DE-Wavenet-A)")
+	ttsCmd.Flags().Float32("speaking-rate", 1.0, "speaking rate")
+	ttsCmd.Flags().Float32("pitch", 1.0, "pitch")
 }
diff --git a/tts/tts.go b/tts/tts.go
@@ -15,7 +15,7 @@ const (
 	timeout = time.Second * 10
 )
 
-func Create(sentence string, serviceAccountKey []byte, voiceName string) ([]byte, error) {
+func Create(sentence string, serviceAccountKey []byte, languageCode string, voiceName string, speakingRate float32, pitch float32) ([]byte, error) {
 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 
@@ -29,12 +29,14 @@ func Create(sentence string, serviceAccountKey []byte, voiceName string) ([]byte
 			InputSource: &texttospeechpb.SynthesisInput_Text{Text: sentence},
 		},
 		Voice: &texttospeechpb.VoiceSelectionParams{
-			Name: voiceName,
+			LanguageCode: languageCode,
+			Name:         voiceName,
+			SsmlGender:   texttospeechpb.SsmlVoiceGender_NEUTRAL,
 		},
 		AudioConfig: &texttospeechpb.AudioConfig{
 			AudioEncoding: texttospeechpb.AudioEncoding_MP3,
-			SpeakingRate: 1.0,
-			Pitch: 1.0,
+			SpeakingRate: float64(speakingRate),
+			Pitch: float64(pitch),
 		},
 	}