Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added voices commands config #330

Merged
merged 2 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions src/google-cloud/gcTextToSpeechClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import type { CredentialBody } from 'google-auth-library/build/src/auth/credenti

export interface TextToSpeechParams {
text: string
languageCode: 'en-US' | 'yue-Hant-HK' | 'ru-RU' | 'cmn-CN' | 'es-ES'
gender: 'MALE' | 'FEMALE'
languageCode: string
ssmlGender?: 'MALE' | 'FEMALE'
voiceName?: string
}

class GcTextToSpeechClient {
Expand All @@ -14,12 +15,21 @@ class GcTextToSpeechClient {
this._client = new GcTextToSpeech.TextToSpeechClient({ credentials })
}

async textToSpeech ({ text, languageCode, gender }: TextToSpeechParams): Promise<string | Uint8Array | null | undefined> {
async ssmlTextToSpeech ({ text, languageCode, ssmlGender, voiceName }: TextToSpeechParams): Promise<string | Uint8Array | null | undefined> {
const ssml = `<speak>${text}</speak>`

const [response] = await this._client.synthesizeSpeech({
input: { ssml },
voice: { languageCode, ssmlGender: gender },
voice: { languageCode, ssmlGender, name: voiceName },
audioConfig: { audioEncoding: 'OGG_OPUS' }
})

return response.audioContent
}

async textToSpeech ({ text, languageCode, voiceName }: TextToSpeechParams): Promise<string | Uint8Array | null | undefined> {
const [response] = await this._client.synthesizeSpeech({
input: { text },
voice: { languageCode, name: voiceName },
audioConfig: { audioEncoding: 'OGG_OPUS' }
})

Expand Down
164 changes: 164 additions & 0 deletions src/modules/text-to-speech/commandConfigList.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import type { TextToSpeechParams } from '../../google-cloud/gcTextToSpeechClient'

interface CommandConfigItem {
command: string
gcParams: Omit<TextToSpeechParams, 'text'>
}

export const commandConfigList: CommandConfigItem[] = [
// English
{
command: 'venm',
gcParams: {
languageCode: 'en-US',
voiceName: 'en-US-Neural2-I'
}
},
{
command: 'venf',
gcParams: {
languageCode: 'en-US',
voiceName: 'en-US-Neural2-F'
}
},
// Mandarin (Chinese)
{
command: 'vcnm',
gcParams: {
languageCode: 'cmn-CN',
voiceName: 'cmn-CN-Wavenet-B'
}
},
{
command: 'vcnf',
gcParams: {
languageCode: 'cmn-CN',
voiceName: 'cmn-CN-Wavenet-A'
}
},
// Cantonese (Chinese)
{
command: 'vhkm',
gcParams: {
languageCode: 'yue-Hant-HK',
voiceName: 'yue-HK-Standard-B'
}
},
{
command: 'vhkf',
gcParams: {
languageCode: 'en-US',
voiceName: 'yue-HK-Standard-A'
}
},
// German
{
command: 'vdem',
gcParams: {
languageCode: 'de-DE',
voiceName: 'de-DE-Neural2-B'
}
},
{
command: 'vdef',
gcParams: {
languageCode: 'de-DE',
voiceName: 'de-DE-Neural2-C'
}
},
// Spanish
{
command: 'vesm',
gcParams: {
languageCode: 'es-ES',
voiceName: 'es-ES-Neural2-B'
}
},
{
command: 'vesf',
gcParams: {
languageCode: 'es-ES',
voiceName: 'es-ES-Neural2-A'
}
},
// Russian
{
command: 'vrum',
gcParams: {
languageCode: 'ru-RU',
voiceName: 'ru-RU-Wavenet-B'
}
},
{
command: 'vruf',
gcParams: {
languageCode: 'ru-RU',
voiceName: 'ru-RU-Wavenet-A'
}
},
// Indonesian
{
command: 'vidm',
gcParams: {
languageCode: 'id-ID',
voiceName: 'id-ID-Wavenet-B'
}
},
{
command: 'vidf',
gcParams: {
languageCode: 'id-ID',
voiceName: 'id-ID-Wavenet-A'
}
},
// Korean
{
command: 'vkom',
gcParams: {
languageCode: 'ko-KR',
voiceName: 'ko-KR-Wavenet-C'
}
},
{
command: 'vkof',
gcParams: {
languageCode: 'ko-KR',
voiceName: 'ko-KR-Wavenet-A'
}
},
// Japanese
{
command: 'vjam',
gcParams: {
languageCode: 'ja-JP',
voiceName: 'ja-JP-Wavenet-C'
}
},
{
command: 'vjaf',
gcParams: {
languageCode: 'ja-JP',
voiceName: 'ja-JP-Wavenet-B'
}
},
// Portuguese
{
command: 'vptm',
gcParams: {
languageCode: 'pt-PT',
voiceName: 'pt-PT-Wavenet-C'
}
},
{
command: 'vptf',
gcParams: {
languageCode: 'pt-PT',
voiceName: 'pt-PT-Wavenet-A'
}
}
]

export function getCommandList (): string[] { return commandConfigList.map(item => item.command) }
export function getConfigByCommand (command: string): CommandConfigItem | undefined {
return commandConfigList.find((item) => item.command === command)
}
88 changes: 60 additions & 28 deletions src/modules/text-to-speech/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type { Logger } from 'pino'
import type { BotPayments } from '../payment'
import type { OnMessageContext, PayableBot } from '../types'
import { gcTextToSpeedClient, type TextToSpeechParams } from '../../google-cloud/gcTextToSpeechClient'
import { getCommandList, getConfigByCommand } from './commandConfigList'

enum SupportedCommands {
VOICE = 'voice',
Expand Down Expand Up @@ -32,7 +33,7 @@ export class TextToSpeechBot implements PayableBot {
}

public isSupportedEvent (ctx: OnMessageContext): boolean {
return ctx.hasCommand(Object.values(SupportedCommands))
return ctx.hasCommand(Object.values(SupportedCommands)) || ctx.hasCommand(getCommandList())
}

public getEstimatedPrice (ctx: OnMessageContext): number {
Expand All @@ -51,45 +52,71 @@ export class TextToSpeechBot implements PayableBot {
public async onEvent (ctx: OnMessageContext): Promise<void> {
if (ctx.hasCommand(SupportedCommands.VOICE)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'en-US' })
await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'en-US' })
return
}

if (ctx.hasCommand(SupportedCommands.VOICEHK)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'yue-Hant-HK' })
return
}
// if (ctx.hasCommand(SupportedCommands.VOICEHK)) {
// const text = this.getTextFromMessage(ctx)
// await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'yue-Hant-HK' })
// return
// }

// if (ctx.hasCommand(SupportedCommands.VOICEHKF)) {
// const text = this.getTextFromMessage(ctx)
// await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'yue-Hant-HK' })
// return
// }

// if (ctx.hasCommand(SupportedCommands.VOICERU)) {
// const text = this.getTextFromMessage(ctx)
// await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' })
// return
// }

// if (ctx.hasCommand(SupportedCommands.VOICERU)) {
// const text = this.getTextFromMessage(ctx)
// await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' })
// }

// if (ctx.hasCommand(SupportedCommands.VOICECN)) {
// const text = this.getTextFromMessage(ctx)
// await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'cmn-CN' })
// }

// if (ctx.hasCommand(SupportedCommands.VOICEES)) {
// const text = this.getTextFromMessage(ctx)
// await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'es-ES' })
// }

if (ctx.hasCommand(getCommandList())) {
const rawCommand = ctx.entities().find(item => item.type === 'bot_command' && item.offset === 0)
if (!rawCommand) {
await ctx.reply('Unexpected error')
return
}

if (ctx.hasCommand(SupportedCommands.VOICEHKF)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'FEMALE', languageCode: 'yue-Hant-HK' })
return
}
const command = rawCommand.text.replace('/', '') ?? null

if (ctx.hasCommand(SupportedCommands.VOICERU)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'FEMALE', languageCode: 'ru-RU' })
}
if (!command) {
await ctx.reply('I cannot extract the command from the text')
return
}

if (ctx.hasCommand(SupportedCommands.VOICERU)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'FEMALE', languageCode: 'ru-RU' })
}
const config = getConfigByCommand(command)

if (ctx.hasCommand(SupportedCommands.VOICECN)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'cmn-CN' })
}
if (!config) {
await ctx.reply('There is no configuration available for this command')
return
}

if (ctx.hasCommand(SupportedCommands.VOICEES)) {
const text = this.getTextFromMessage(ctx)
await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'es-ES' })
await this.onTextToSpeech(ctx, { text, ...config.gcParams })
}
}

public async onTextToSpeech (ctx: OnMessageContext, params: TextToSpeechParams): Promise<void> {
const { text, gender, languageCode } = params
const { text, ssmlGender, languageCode, voiceName } = params

if (!params.text) {
await ctx.reply('/voice command should contain text.')
Expand All @@ -102,7 +129,12 @@ export class TextToSpeechBot implements PayableBot {

const progressMessage = await ctx.reply('Generating...')

const voiceResult = await gcTextToSpeedClient.textToSpeech({ text, gender, languageCode })
let voiceResult
if (ssmlGender) { // to support genders for old commands
voiceResult = await gcTextToSpeedClient.ssmlTextToSpeech({ text, ssmlGender, languageCode, voiceName })
} else {
voiceResult = await gcTextToSpeedClient.textToSpeech({ text, ssmlGender, languageCode, voiceName })
}

if (!voiceResult) {
await ctx.api.editMessageText(ctx.chat.id, progressMessage.message_id, 'An error occurred during the process of generating the message.')
Expand Down
2 changes: 1 addition & 1 deletion src/modules/voice-translate/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export class VoiceTranslateBot implements PayableBot {

const translateResult = await translator.translateText(resultText, null, 'en-US')

const voiceResult = await gcTextToSpeedClient.textToSpeech({ text: translateResult.text, gender: 'MALE', languageCode: 'en-US' })
const voiceResult = await gcTextToSpeedClient.ssmlTextToSpeech({ text: translateResult.text, ssmlGender: 'MALE', languageCode: 'en-US' })

if (!voiceResult) {
await ctx.reply('voice generation error')
Expand Down