Skip to content

Commit

Permalink
Support SSML in Speech-to-text (#192)
Browse files Browse the repository at this point in the history
Add `--ssml` flag in tts command.
  • Loading branch information
yteraoka authored Jul 11, 2024
1 parent fa77055 commit c6687c8
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,11 @@ $ go-chromecast tts <message_to_say> --google-service-account=/path/to/service/a
```

List of available voices (voice-name) can be found here: https://cloud.google.com/text-to-speech/

Use [SSML](https://cloud.google.com/text-to-speech/docs/ssml)

```
$ go-chromecast tts '<speak>Hello<break time="500ms"/>world.</speak>' \
--google-service-account=/path/to/service/account.json \
--ssml
```
4 changes: 3 additions & 1 deletion cmd/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ var ttsCmd = &cobra.Command{
voiceName, _ := cmd.Flags().GetString("voice-name")
speakingRate, _ := cmd.Flags().GetFloat32("speaking-rate")
pitch, _ := cmd.Flags().GetFloat32("pitch")
ssml, _ := cmd.Flags().GetBool("ssml")

b, err := ioutil.ReadFile(googleServiceAccount)
if err != nil {
Expand All @@ -54,7 +55,7 @@ var ttsCmd = &cobra.Command{
exit("unable to get cast application: %v", err)
}

data, err := tts.Create(args[0], b, languageCode, voiceName, speakingRate, pitch)
data, err := tts.Create(args[0], b, languageCode, voiceName, speakingRate, pitch, ssml)
if err != nil {
exit("unable to create tts: %v", err)
}
Expand Down Expand Up @@ -85,4 +86,5 @@ func init() {
ttsCmd.Flags().String("voice-name", "en-US-Wavenet-G", "text-to-speech Voice (en-US-Wavenet-G, pl-PL-Wavenet-A, pl-PL-Wavenet-B, de-DE-Wavenet-A)")
ttsCmd.Flags().Float32("speaking-rate", 1.0, "speaking rate")
ttsCmd.Flags().Float32("pitch", 1.0, "pitch")
ttsCmd.Flags().Bool("ssml", false, "use SSML")
}
13 changes: 9 additions & 4 deletions tts/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const (
timeout = time.Second * 10
)

func Create(sentence string, serviceAccountKey []byte, languageCode string, voiceName string, speakingRate float32, pitch float32) ([]byte, error) {
func Create(sentence string, serviceAccountKey []byte, languageCode string, voiceName string, speakingRate float32, pitch float32, ssml bool) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

Expand All @@ -24,10 +24,15 @@ func Create(sentence string, serviceAccountKey []byte, languageCode string, voic
return nil, errors.Wrap(err, "unable to create texttospeech client")
}

input := texttospeechpb.SynthesisInput{}
if ssml {
input.InputSource = &texttospeechpb.SynthesisInput_Ssml{Ssml: sentence}
} else {
input.InputSource = &texttospeechpb.SynthesisInput_Text{Text: sentence}
}

req := texttospeechpb.SynthesizeSpeechRequest{
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Text{Text: sentence},
},
Input: &input,
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: languageCode,
Name: voiceName,
Expand Down

0 comments on commit c6687c8

Please sign in to comment.