From 09721abb6bb15b06376ab5409adfa616eaf45709 Mon Sep 17 00:00:00 2001 From: ahiipsa Date: Tue, 3 Oct 2023 19:50:27 +0400 Subject: [PATCH 1/2] Added voices commands config --- src/google-cloud/gcTextToSpeechClient.ts | 20 ++++-- .../text-to-speech/commandConfigList.ts | 70 +++++++++++++++++++ src/modules/text-to-speech/index.ts | 52 +++++++++++--- src/modules/voice-translate/index.ts | 2 +- 4 files changed, 128 insertions(+), 16 deletions(-) create mode 100644 src/modules/text-to-speech/commandConfigList.ts diff --git a/src/google-cloud/gcTextToSpeechClient.ts b/src/google-cloud/gcTextToSpeechClient.ts index d56bb873..93819fb2 100644 --- a/src/google-cloud/gcTextToSpeechClient.ts +++ b/src/google-cloud/gcTextToSpeechClient.ts @@ -4,8 +4,9 @@ import type { CredentialBody } from 'google-auth-library/build/src/auth/credenti export interface TextToSpeechParams { text: string - languageCode: 'en-US' | 'yue-Hant-HK' | 'ru-RU' | 'cmn-CN' | 'es-ES' - gender: 'MALE' | 'FEMALE' + languageCode: 'en-US' | 'yue-Hant-HK' | 'ru-RU' | 'cmn-CN' | 'de-DE' | 'es-ES' + ssmlGender?: 'MALE' | 'FEMALE' + voiceName?: string } class GcTextToSpeechClient { @@ -14,12 +15,21 @@ class GcTextToSpeechClient { this._client = new GcTextToSpeech.TextToSpeechClient({ credentials }) } - async textToSpeech ({ text, languageCode, gender }: TextToSpeechParams): Promise { + async ssmlTextToSpeech ({ text, languageCode, ssmlGender, voiceName }: TextToSpeechParams): Promise { const ssml = `${text}` - const [response] = await this._client.synthesizeSpeech({ input: { ssml }, - voice: { languageCode, ssmlGender: gender }, + voice: { languageCode, ssmlGender, name: voiceName }, + audioConfig: { audioEncoding: 'OGG_OPUS' } + }) + + return response.audioContent + } + + async textToSpeech ({ text, languageCode, voiceName }: TextToSpeechParams): Promise { + const [response] = await this._client.synthesizeSpeech({ + input: { text }, + voice: { languageCode, name: voiceName }, audioConfig: { audioEncoding: 'OGG_OPUS' } }) diff --git a/src/modules/text-to-speech/commandConfigList.ts b/src/modules/text-to-speech/commandConfigList.ts new file mode 100644 index 00000000..1061382e --- /dev/null +++ b/src/modules/text-to-speech/commandConfigList.ts @@ -0,0 +1,70 @@ +import type { TextToSpeechParams } from '../../google-cloud/gcTextToSpeechClient' + +interface CommandConfigItem { + command: string + gcParams: Omit +} + +export const commandConfigList: CommandConfigItem[] = [ + { + command: 'venm', + gcParams: { + languageCode: 'en-US', + voiceName: 'en-US-Neural2-A' + } + }, + { + command: 'venf', + gcParams: { + languageCode: 'en-US', + voiceName: 'en-US-Neural2-C' + } + }, + { + command: 'vhkm', + gcParams: { + languageCode: 'yue-Hant-HK', + voiceName: 'yue-HK-Standard-B' + } + }, + { + command: 'vhkf', + gcParams: { + languageCode: 'en-US', + voiceName: 'yue-HK-Standard-A' + } + }, + { + command: 'vdem', + gcParams: { + languageCode: 'de-DE', + voiceName: 'de-DE-Neural2-B' + } + }, + { + command: 'vdef', + gcParams: { + languageCode: 'de-DE', + voiceName: 'de-DE-Neural2-C' + } + }, + { + command: 'vesm', + gcParams: { + languageCode: 'es-ES', + voiceName: 'es-ES-Neural2-B' + } + }, + { + command: 'vesf', + gcParams: { + languageCode: 'es-ES', + voiceName: 'es-ES-Neural2-A' + } + } +] + +export function getCommandList (): string[] { return commandConfigList.map(item => item.command) } +export function getConfigByCommand (command: string): CommandConfigItem | undefined { + return commandConfigList.find((item) => item.command === command) +} diff --git a/src/modules/text-to-speech/index.ts b/src/modules/text-to-speech/index.ts index 41ad5616..de702e90 100644 --- a/src/modules/text-to-speech/index.ts +++ b/src/modules/text-to-speech/index.ts @@ -4,6 +4,7 @@ import type { Logger } from 'pino' import type { BotPayments } from '../payment' import type { OnMessageContext, PayableBot } from '../types' import { gcTextToSpeedClient, type TextToSpeechParams } from '../../google-cloud/gcTextToSpeechClient' +import { getCommandList, getConfigByCommand } from './commandConfigList' enum SupportedCommands { VOICE = 'voice', @@ -32,7 +33,7 @@ export class TextToSpeechBot implements PayableBot { } public isSupportedEvent (ctx: OnMessageContext): boolean { - return ctx.hasCommand(Object.values(SupportedCommands)) + return ctx.hasCommand(Object.values(SupportedCommands)) || ctx.hasCommand(getCommandList()) } public getEstimatedPrice (ctx: OnMessageContext): number { @@ -51,45 +52,71 @@ export class TextToSpeechBot implements PayableBot { public async onEvent (ctx: OnMessageContext): Promise { if (ctx.hasCommand(SupportedCommands.VOICE)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'en-US' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'en-US' }) return } if (ctx.hasCommand(SupportedCommands.VOICEHK)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'yue-Hant-HK' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'yue-Hant-HK' }) return } if (ctx.hasCommand(SupportedCommands.VOICEHKF)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'FEMALE', languageCode: 'yue-Hant-HK' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'yue-Hant-HK' }) return } if (ctx.hasCommand(SupportedCommands.VOICERU)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'FEMALE', languageCode: 'ru-RU' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' }) + return } if (ctx.hasCommand(SupportedCommands.VOICERU)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'FEMALE', languageCode: 'ru-RU' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' }) } if (ctx.hasCommand(SupportedCommands.VOICECN)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'cmn-CN' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'cmn-CN' }) } if (ctx.hasCommand(SupportedCommands.VOICEES)) { const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, gender: 'MALE', languageCode: 'es-ES' }) + await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'es-ES' }) + } + + if (ctx.hasCommand(getCommandList())) { + const rawCommand = ctx.entities().find(item => item.type === 'bot_command' && item.offset === 0) + if (!rawCommand) { + await ctx.reply('Unexpected error') + return + } + + const command = rawCommand.text.replace('/', '') ?? null + + if (!command) { + await ctx.reply('I cannot extract the command from the text') + return + } + + const config = getConfigByCommand(command) + + if (!config) { + await ctx.reply('There is no configuration available for this command') + return + } + + const text = this.getTextFromMessage(ctx) + await this.onTextToSpeech(ctx, { text, ...config.gcParams }) } } public async onTextToSpeech (ctx: OnMessageContext, params: TextToSpeechParams): Promise { - const { text, gender, languageCode } = params + const { text, ssmlGender, languageCode, voiceName } = params if (!params.text) { await ctx.reply('/voice command should contain text.') @@ -102,7 +129,12 @@ export class TextToSpeechBot implements PayableBot { const progressMessage = await ctx.reply('Generating...') - const voiceResult = await gcTextToSpeedClient.textToSpeech({ text, gender, languageCode }) + let voiceResult + if (ssmlGender) { // to support genders for old commands + voiceResult = await gcTextToSpeedClient.ssmlTextToSpeech({ text, ssmlGender, languageCode, voiceName }) + } else { + voiceResult = await gcTextToSpeedClient.textToSpeech({ text, ssmlGender, languageCode, voiceName }) + } if (!voiceResult) { await ctx.api.editMessageText(ctx.chat.id, progressMessage.message_id, 'An error occurred during the process of generating the message.') diff --git a/src/modules/voice-translate/index.ts b/src/modules/voice-translate/index.ts index 44ba8833..d85e0cd1 100644 --- a/src/modules/voice-translate/index.ts +++ b/src/modules/voice-translate/index.ts @@ -71,7 +71,7 @@ export class VoiceTranslateBot implements PayableBot { const translateResult = await translator.translateText(resultText, null, 'en-US') - const voiceResult = await gcTextToSpeedClient.textToSpeech({ text: translateResult.text, gender: 'MALE', languageCode: 'en-US' }) + const voiceResult = await gcTextToSpeedClient.ssmlTextToSpeech({ text: translateResult.text, ssmlGender: 'MALE', languageCode: 'en-US' }) if (!voiceResult) { await ctx.reply('voice generation error') From c8c84b1d2f1eb55487037d32b0b6a98cb60daaa4 Mon Sep 17 00:00:00 2001 From: Theo Fandrich Date: Tue, 3 Oct 2023 09:38:43 -0700 Subject: [PATCH 2/2] added languages --- src/google-cloud/gcTextToSpeechClient.ts | 2 +- .../text-to-speech/commandConfigList.ts | 98 ++++++++++++++++++- src/modules/text-to-speech/index.ts | 64 ++++++------ 3 files changed, 129 insertions(+), 35 deletions(-) diff --git a/src/google-cloud/gcTextToSpeechClient.ts b/src/google-cloud/gcTextToSpeechClient.ts index 93819fb2..29014afe 100644 --- a/src/google-cloud/gcTextToSpeechClient.ts +++ b/src/google-cloud/gcTextToSpeechClient.ts @@ -4,7 +4,7 @@ import type { CredentialBody } from 'google-auth-library/build/src/auth/credenti export interface TextToSpeechParams { text: string - languageCode: 'en-US' | 'yue-Hant-HK' | 'ru-RU' | 'cmn-CN' | 'de-DE' | 'es-ES' + languageCode: string ssmlGender?: 'MALE' | 'FEMALE' voiceName?: string } diff --git a/src/modules/text-to-speech/commandConfigList.ts b/src/modules/text-to-speech/commandConfigList.ts index 1061382e..c694a6b1 100644 --- a/src/modules/text-to-speech/commandConfigList.ts +++ b/src/modules/text-to-speech/commandConfigList.ts @@ -6,20 +6,37 @@ interface CommandConfigItem { } export const commandConfigList: CommandConfigItem[] = [ + // English { command: 'venm', gcParams: { languageCode: 'en-US', - voiceName: 'en-US-Neural2-A' + voiceName: 'en-US-Neural2-I' } }, { command: 'venf', gcParams: { languageCode: 'en-US', - voiceName: 'en-US-Neural2-C' + voiceName: 'en-US-Neural2-F' } }, + // Mandarin (Chinese) + { + command: 'vcnm', + gcParams: { + languageCode: 'cmn-CN', + voiceName: 'cmn-CN-Wavenet-B' + } + }, + { + command: 'vcnf', + gcParams: { + languageCode: 'cmn-CN', + voiceName: 'cmn-CN-Wavenet-A' + } + }, + // Cantonese (Chinese) { command: 'vhkm', gcParams: { @@ -34,6 +51,7 @@ export const commandConfigList: CommandConfigItem[] = [ voiceName: 'yue-HK-Standard-A' } }, + // German { command: 'vdem', gcParams: { @@ -48,6 +66,7 @@ export const commandConfigList: CommandConfigItem[] = [ voiceName: 'de-DE-Neural2-C' } }, + // Spanish { command: 'vesm', gcParams: { @@ -61,6 +80,81 @@ export const commandConfigList: CommandConfigItem[] = [ languageCode: 'es-ES', voiceName: 'es-ES-Neural2-A' } + }, + // Russian + { + command: 'vrum', + gcParams: { + languageCode: 'ru-RU', + voiceName: 'ru-RU-Wavenet-B' + } + }, + { + command: 'vruf', + gcParams: { + languageCode: 'ru-RU', + voiceName: 'ru-RU-Wavenet-A' + } + }, + // Indonesian + { + command: 'vidm', + gcParams: { + languageCode: 'id-ID', + voiceName: 'id-ID-Wavenet-B' + } + }, + { + command: 'vidf', + gcParams: { + languageCode: 'id-ID', + voiceName: 'id-ID-Wavenet-A' + } + }, + // Korean + { + command: 'vkom', + gcParams: { + languageCode: 'ko-KR', + voiceName: 'ko-KR-Wavenet-C' + } + }, + { + command: 'vkof', + gcParams: { + languageCode: 'ko-KR', + voiceName: 'ko-KR-Wavenet-A' + } + }, + // Japanese + { + command: 'vjam', + gcParams: { + languageCode: 'ja-JP', + voiceName: 'ja-JP-Wavenet-C' + } + }, + { + command: 'vjaf', + gcParams: { + languageCode: 'ja-JP', + voiceName: 'ja-JP-Wavenet-B' + } + }, + // Portuguese + { + command: 'vptm', + gcParams: { + languageCode: 'pt-PT', + voiceName: 'pt-PT-Wavenet-C' + } + }, + { + command: 'vptf', + gcParams: { + languageCode: 'pt-PT', + voiceName: 'pt-PT-Wavenet-A' + } } ] diff --git a/src/modules/text-to-speech/index.ts b/src/modules/text-to-speech/index.ts index de702e90..cb35b2f0 100644 --- a/src/modules/text-to-speech/index.ts +++ b/src/modules/text-to-speech/index.ts @@ -56,38 +56,38 @@ export class TextToSpeechBot implements PayableBot { return } - if (ctx.hasCommand(SupportedCommands.VOICEHK)) { - const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'yue-Hant-HK' }) - return - } - - if (ctx.hasCommand(SupportedCommands.VOICEHKF)) { - const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'yue-Hant-HK' }) - return - } - - if (ctx.hasCommand(SupportedCommands.VOICERU)) { - const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' }) - return - } - - if (ctx.hasCommand(SupportedCommands.VOICERU)) { - const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' }) - } - - if (ctx.hasCommand(SupportedCommands.VOICECN)) { - const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'cmn-CN' }) - } - - if (ctx.hasCommand(SupportedCommands.VOICEES)) { - const text = this.getTextFromMessage(ctx) - await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'es-ES' }) - } + // if (ctx.hasCommand(SupportedCommands.VOICEHK)) { + // const text = this.getTextFromMessage(ctx) + // await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'yue-Hant-HK' }) + // return + // } + + // if (ctx.hasCommand(SupportedCommands.VOICEHKF)) { + // const text = this.getTextFromMessage(ctx) + // await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'yue-Hant-HK' }) + // return + // } + + // if (ctx.hasCommand(SupportedCommands.VOICERU)) { + // const text = this.getTextFromMessage(ctx) + // await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' }) + // return + // } + + // if (ctx.hasCommand(SupportedCommands.VOICERU)) { + // const text = this.getTextFromMessage(ctx) + // await this.onTextToSpeech(ctx, { text, ssmlGender: 'FEMALE', languageCode: 'ru-RU' }) + // } + + // if (ctx.hasCommand(SupportedCommands.VOICECN)) { + // const text = this.getTextFromMessage(ctx) + // await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'cmn-CN' }) + // } + + // if (ctx.hasCommand(SupportedCommands.VOICEES)) { + // const text = this.getTextFromMessage(ctx) + // await this.onTextToSpeech(ctx, { text, ssmlGender: 'MALE', languageCode: 'es-ES' }) + // } if (ctx.hasCommand(getCommandList())) { const rawCommand = ctx.entities().find(item => item.type === 'bot_command' && item.offset === 0)