From 5cc4d05c3db8a37b5f6250bda13dea1fb445f234 Mon Sep 17 00:00:00 2001 From: ahiipsa Date: Wed, 27 Sep 2023 17:30:51 +0400 Subject: [PATCH] Added voice translate and text to speech --- src/bot.ts | 5 +- src/config.ts | 1 + src/google-cloud/gcTextToSpeechClient.ts | 26 ++++++++++ src/modules/text-to-speech/index.ts | 65 ++++++++++++++++++++++++ src/modules/voice-translate/client.ts | 17 ------- src/modules/voice-translate/index.ts | 53 ++++++------------- 6 files changed, 112 insertions(+), 55 deletions(-) create mode 100644 src/google-cloud/gcTextToSpeechClient.ts create mode 100644 src/modules/text-to-speech/index.ts delete mode 100644 src/modules/voice-translate/client.ts diff --git a/src/bot.ts b/src/bot.ts index 2f9a9272..b225d87f 100644 --- a/src/bot.ts +++ b/src/bot.ts @@ -51,6 +51,7 @@ import { ProfilingIntegration } from '@sentry/profiling-node' import { ES } from './es' import { hydrateFiles } from '@grammyjs/files' import { VoiceTranslateBot } from './modules/voice-translate' +import { TextToSpeechBot } from './modules/text-to-speech' Events.EventEmitter.defaultMaxListeners = 30 @@ -216,6 +217,7 @@ const llmsBot = new LlmsBot(payments) const documentBot = new DocumentHandler() const telegramPayments = new TelegramPayments(payments) const voiceTranslateBot = new VoiceTranslateBot(payments) +const textToSpeechBot = new TextToSpeechBot(payments) bot.on('message:new_chat_members:me', async (ctx) => { try { @@ -329,6 +331,7 @@ const PayableBots: Record = { voiceMemo: { bot: voiceMemo }, documentBot: { bot: documentBot }, translateBot: { bot: translateBot }, + textToSpeech: { bot: textToSpeechBot }, openAiBot: { enabled: (ctx: OnMessageContext) => ctx.session.openAi.imageGen.isEnabled, bot: openAiBot @@ -664,6 +667,6 @@ async function bootstrap (): Promise { } bootstrap().catch((error) => { - console.error(`bot bootstrap error ${error}`) + logger.error(`bot bootstrap error ${error}`) process.exit(1) }) diff --git a/src/config.ts b/src/config.ts index ae760a61..83a43614 100644 --- a/src/config.ts +++ b/src/config.ts @@ -131,6 +131,7 @@ export default { 'https://api.thegraph.com/subgraphs/name/nick8319/uniswap-v3-harmony' }, walletConnect: { projectId: process.env.WALLET_CONNECT_PROJECT_ID ?? '' }, + voiceTranslate: { isEnabled: Boolean(parseInt(process.env.BOT_VOICE_TRANSLATE_ENABLE ?? '0')) }, db: { url: process.env.DATABASE_URL ?? '' }, credits: { maxChats: 3, diff --git a/src/google-cloud/gcTextToSpeechClient.ts b/src/google-cloud/gcTextToSpeechClient.ts new file mode 100644 index 00000000..f765e677 --- /dev/null +++ b/src/google-cloud/gcTextToSpeechClient.ts @@ -0,0 +1,26 @@ +import GcTextToSpeech, { type TextToSpeechClient } from '@google-cloud/text-to-speech' +import config from '../config' +import type { CredentialBody } from 'google-auth-library/build/src/auth/credentials' + +class GcTextToSpeechClient { + private readonly _client: TextToSpeechClient + constructor (credentials: CredentialBody) { + this._client = new GcTextToSpeech.TextToSpeechClient({ credentials }) + } + + async textToSpeech (text: string): Promise { + const ssml = `${text}` + + const [response] = await this._client.synthesizeSpeech({ + input: { ssml }, + voice: { languageCode: 'en-US', ssmlGender: 'MALE' }, + audioConfig: { audioEncoding: 'OGG_OPUS' } + }) + + return response.audioContent + } +} + +const credentials = JSON.parse(Buffer.from(config.gc.credentials, 'base64').toString('utf-8')) + +export const gcTextToSpeedClient = new GcTextToSpeechClient(credentials) diff --git a/src/modules/text-to-speech/index.ts b/src/modules/text-to-speech/index.ts new file mode 100644 index 00000000..111a633a --- /dev/null +++ b/src/modules/text-to-speech/index.ts @@ -0,0 +1,65 @@ +import pino from 'pino' +import { InputFile } from 'grammy' +import type { Logger } from 'pino' +import type { BotPayments } from '../payment' +import type { OnMessageContext, PayableBot } from '../types' +import { gcTextToSpeedClient } from '../../google-cloud/gcTextToSpeechClient' + +export class TextToSpeechBot implements PayableBot { + private readonly payments: BotPayments + + private readonly logger: Logger + + constructor (payments: BotPayments) { + this.payments = payments + this.logger = pino({ + name: 'TextToSpeech', + transport: { + target: 'pino-pretty', + options: { colorize: true } + } + }) + } + + public isSupportedEvent (ctx: OnMessageContext): boolean { + return ctx.hasCommand('voice') + } + + public getEstimatedPrice (ctx: OnMessageContext): number { + const str = ctx.match?.toString() ?? '' + + return str.length * 0.0001 + } + + public async onTextToSpeech (ctx: OnMessageContext, message: string): Promise { + if (!message) { + await ctx.reply('/voice command should contain text.') + return + } + + if (!ctx.chat?.id) { + throw new Error('Internal error') + } + + const progressMessage = await ctx.reply('Waite a moment...') + + const voiceResult = await gcTextToSpeedClient.textToSpeech(message) + + if (!voiceResult) { + await ctx.api.editMessageText(ctx.chat.id, progressMessage.message_id, 'An error occurred during the process of generating the message.') + return + } + + const inputFile = new InputFile(voiceResult) + + await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id) + await ctx.replyWithVoice(inputFile) + } + + public async onEvent (ctx: OnMessageContext): Promise { + if (ctx.hasCommand('voice')) { + const text = ctx.match.toString() + await this.onTextToSpeech(ctx, text) + } + } +} diff --git a/src/modules/voice-translate/client.ts b/src/modules/voice-translate/client.ts deleted file mode 100644 index 3ce130c2..00000000 --- a/src/modules/voice-translate/client.ts +++ /dev/null @@ -1,17 +0,0 @@ -import GcTextToSpeech from '@google-cloud/text-to-speech' -import config from '../../config' - -const credentials = JSON.parse(Buffer.from(config.gc.credentials, 'base64').toString('utf-8')) -const client = new GcTextToSpeech.TextToSpeechClient({ credentials }) - -export async function textToSpeech (text: string): Promise { - const ssml = `${text}` - - const [response] = await client.synthesizeSpeech({ - input: { ssml }, - voice: { languageCode: 'en-US', ssmlGender: 'MALE' }, - audioConfig: { audioEncoding: 'MP3' } - }) - - return response.audioContent -} diff --git a/src/modules/voice-translate/index.ts b/src/modules/voice-translate/index.ts index 74923791..870916c0 100644 --- a/src/modules/voice-translate/index.ts +++ b/src/modules/voice-translate/index.ts @@ -2,10 +2,12 @@ import fs from 'fs' import pino from 'pino' import { InputFile } from 'grammy' import type { Logger } from 'pino' -import { textToSpeech } from './client' +import { gcTextToSpeedClient } from '../../google-cloud/gcTextToSpeechClient' import type { BotPayments } from '../payment' import { speechToText } from '../open-ai/api/openAi' import type { OnMessageContext, PayableBot } from '../types' +import config from '../../config' +import { translator } from '../translate/deeplClient' export class VoiceTranslateBot implements PayableBot { private readonly payments: BotPayments @@ -26,52 +28,27 @@ export class VoiceTranslateBot implements PayableBot { public isSupportedEvent (ctx: OnMessageContext): boolean { const { voice, audio } = ctx.update.message - return (!!voice || !!audio) || ctx.hasCommand('voice') - } - - public getEstimatedPrice (ctx: OnMessageContext): number { - return 0 - } - - public async onTextToSpeech (ctx: OnMessageContext, message: string): Promise { - if (!message) { - await ctx.reply('/voice command should contain text.') - return - } - - if (!ctx.chat?.id) { - throw new Error('Internal error') - } - - const progressMessage = await ctx.reply('Waite a moment...') - - const voiceResult = await textToSpeech(message) - - if (!voiceResult) { - await ctx.api.editMessageText(ctx.chat.id, progressMessage.message_id, 'An error occurred during the process of generating the message.') - return + if (!config.voiceTranslate.isEnabled) { + return false } - const inputFile = new InputFile(voiceResult) + return (!!voice || !!audio) + } - await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id) - await ctx.replyWithVoice(inputFile) + public getEstimatedPrice (ctx: OnMessageContext): number { + const { voice, audio } = ctx.update.message + const seconds = (voice?.duration ?? audio?.duration) ?? 0 + return seconds * 0.005 } public async onEvent (ctx: OnMessageContext): Promise { const { voice, audio } = ctx.update.message - if (ctx.hasCommand('voice')) { - const text = ctx.match.toString() - await this.onTextToSpeech(ctx, text) - return - } - if (!(!!voice || !!audio)) { return } - const message = await ctx.reply('Waite a moment...') + const progressMessage = await ctx.reply('Waite a moment...') if (!ctx.chat?.id) { throw Error('chat id is undefined') @@ -92,14 +69,16 @@ export class VoiceTranslateBot implements PayableBot { const resultText = await speechToText(fs.createReadStream(filename)) fs.rmSync(filename) - const voiceResult = await textToSpeech(resultText) + const translateResult = await translator.translateText(resultText, null, 'en-US') + + const voiceResult = await gcTextToSpeedClient.textToSpeech(translateResult.text) if (!voiceResult) { await ctx.reply('voice generation error') return } - await ctx.api.editMessageText(ctx.chat.id, message.message_id, resultText) + await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id) const inputFile = new InputFile(voiceResult)