Skip to content

Commit

Permalink
TTS: Added speaking rate, pitch & languageCode
Browse files Browse the repository at this point in the history
  • Loading branch information
bartek-marek authored and vishen committed Jan 22, 2021
1 parent 4a4e399 commit 9d853ad
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 17 deletions.
25 changes: 15 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ device uuid, `-u <uuid>`, the results will be cached and it will connect to the
## Installing

### Install release binaries

https://github.com/vishen/go-chromecast/releases
* If using Linux: Download the latest release, unzip using `tar -xzf go-chromecast.tar.gz`, and install using `sudo install ./go-chromecast /usr/bin/`

- If using Linux: Download the latest release, unzip using `tar -xzf go-chromecast.tar.gz`, and install using `sudo install ./go-chromecast /usr/bin/`

### Install the usual Go way:

Expand Down Expand Up @@ -96,8 +98,8 @@ Flags:
Use "go-chromecast [command] --help" for more information about a command.
```


## Usage

```
# View available cast devices.
$ go-chromecast ls
Expand Down Expand Up @@ -203,13 +205,14 @@ $ go-chromecast ui
![User-interface example](go-chromecast-ui.png "User-interface example")

A basic terminal user-interface is provided, that supports the following controls:
* Quit: "q"
* Play/Pause: SPACE
* Volume: - / +
* Mute/Unmute: "m"
* Seek (15s): <- / ->
* Previous/Next: PgUp / PgDn
* Stop: "s"

- Quit: "q"
- Play/Pause: SPACE
- Volume: - / +
- Mute/Unmute: "m"
- Seek (15s): <- / ->
- Previous/Next: PgUp / PgDn
- Stop: "s"

It can be run in the following ways:

Expand Down Expand Up @@ -311,5 +314,7 @@ For non en-US languages

```
$ go-chromecast tts <message_to_say> --google-service-account=/path/to/service/account.json \
--voice-name en-US-Wavenet-G
--voice-name en-US-Wavenet-G --speaking-rate 1.05 --pitch 0.9
```

List of available voices (voice-name) can be found here: https://cloud.google.com/text-to-speech/
12 changes: 9 additions & 3 deletions cmd/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ var ttsCmd = &cobra.Command{
fmt.Printf("--google-service-account is required\n")
return
}


languageCode, _ := cmd.Flags().GetString("language-code")
voiceName, _ := cmd.Flags().GetString("voice-name")
speakingRate, _ := cmd.Flags().GetFloat32("speaking-rate")
pitch, _ := cmd.Flags().GetFloat32("pitch")

b, err := ioutil.ReadFile(googleServiceAccount)
if err != nil {
Expand All @@ -54,7 +57,7 @@ var ttsCmd = &cobra.Command{
return
}

data, err := tts.Create(args[0], b, voiceName)
data, err := tts.Create(args[0], b, languageCode, voiceName, speakingRate, pitch)
if err != nil {
fmt.Printf("%v\n", err)
return
Expand Down Expand Up @@ -88,5 +91,8 @@ var ttsCmd = &cobra.Command{
func init() {
rootCmd.AddCommand(ttsCmd)
ttsCmd.Flags().String("google-service-account", "", "google service account JSON file")
ttsCmd.Flags().String("voice-name", "en-US", "text-to-speech Voice (en-US-Wavenet-G, pl-PL-Wavenet-A, pl-PL-Wavenet-B, de-DE-Wavenet-A)")
ttsCmd.Flags().String("language-code", "en-US", "text-to-speech Language Code (de-DE, ja-JP,...)")
ttsCmd.Flags().String("voice-name", "en-US-Wavenet-G", "text-to-speech Voice (en-US-Wavenet-G, pl-PL-Wavenet-A, pl-PL-Wavenet-B, de-DE-Wavenet-A)")
ttsCmd.Flags().Float32("speaking-rate", 1.0, "speaking rate")
ttsCmd.Flags().Float32("pitch", 1.0, "pitch")
}
10 changes: 6 additions & 4 deletions tts/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const (
timeout = time.Second * 10
)

func Create(sentence string, serviceAccountKey []byte, voiceName string) ([]byte, error) {
func Create(sentence string, serviceAccountKey []byte, languageCode string, voiceName string, speakingRate float32, pitch float32) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

Expand All @@ -29,12 +29,14 @@ func Create(sentence string, serviceAccountKey []byte, voiceName string) ([]byte
InputSource: &texttospeechpb.SynthesisInput_Text{Text: sentence},
},
Voice: &texttospeechpb.VoiceSelectionParams{
Name: voiceName,
LanguageCode: languageCode,
Name: voiceName,
SsmlGender: texttospeechpb.SsmlVoiceGender_NEUTRAL,
},
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_MP3,
SpeakingRate: 1.0,
Pitch: 1.0,
SpeakingRate: float64(speakingRate),
Pitch: float64(pitch),
},
}

Expand Down

0 comments on commit 9d853ad

Please sign in to comment.