From 5cc4d05c3db8a37b5f6250bda13dea1fb445f234 Mon Sep 17 00:00:00 2001
From: ahiipsa <ahiipsa@gmail.com>
Date: Wed, 27 Sep 2023 17:30:51 +0400
Subject: [PATCH] Added voice translate and text to speech

---
 src/bot.ts                               |  5 +-
 src/config.ts                            |  1 +
 src/google-cloud/gcTextToSpeechClient.ts | 26 ++++++++++
 src/modules/text-to-speech/index.ts      | 65 ++++++++++++++++++++++++
 src/modules/voice-translate/client.ts    | 17 -------
 src/modules/voice-translate/index.ts     | 53 ++++++-------------
 6 files changed, 112 insertions(+), 55 deletions(-)
 create mode 100644 src/google-cloud/gcTextToSpeechClient.ts
 create mode 100644 src/modules/text-to-speech/index.ts
 delete mode 100644 src/modules/voice-translate/client.ts
diff --git a/src/bot.ts b/src/bot.ts
index 2f9a9272..b225d87f 100644
--- a/src/bot.ts
+++ b/src/bot.ts
@@ -51,6 +51,7 @@ import { ProfilingIntegration } from '@sentry/profiling-node'
 import { ES } from './es'
 import { hydrateFiles } from '@grammyjs/files'
 import { VoiceTranslateBot } from './modules/voice-translate'
+import { TextToSpeechBot } from './modules/text-to-speech'
 
 Events.EventEmitter.defaultMaxListeners = 30
 
@@ -216,6 +217,7 @@ const llmsBot = new LlmsBot(payments)
 const documentBot = new DocumentHandler()
 const telegramPayments = new TelegramPayments(payments)
 const voiceTranslateBot = new VoiceTranslateBot(payments)
+const textToSpeechBot = new TextToSpeechBot(payments)
 
 bot.on('message:new_chat_members:me', async (ctx) => {
   try {
@@ -329,6 +331,7 @@ const PayableBots: Record<string, PayableBotConfig> = {
   voiceMemo: { bot: voiceMemo },
   documentBot: { bot: documentBot },
   translateBot: { bot: translateBot },
+  textToSpeech: { bot: textToSpeechBot },
   openAiBot: {
     enabled: (ctx: OnMessageContext) => ctx.session.openAi.imageGen.isEnabled,
     bot: openAiBot
@@ -664,6 +667,6 @@ async function bootstrap (): Promise<void> {
 }
 
 bootstrap().catch((error) => {
-  console.error(`bot bootstrap error ${error}`)
+  logger.error(`bot bootstrap error ${error}`)
   process.exit(1)
 })
diff --git a/src/config.ts b/src/config.ts
index ae760a61..83a43614 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -131,6 +131,7 @@ export default {
       'https://api.thegraph.com/subgraphs/name/nick8319/uniswap-v3-harmony'
   },
   walletConnect: { projectId: process.env.WALLET_CONNECT_PROJECT_ID ?? '' },
+  voiceTranslate: { isEnabled: Boolean(parseInt(process.env.BOT_VOICE_TRANSLATE_ENABLE ?? '0')) },
   db: { url: process.env.DATABASE_URL ?? '' },
   credits: {
     maxChats: 3,
diff --git a/src/google-cloud/gcTextToSpeechClient.ts b/src/google-cloud/gcTextToSpeechClient.ts
new file mode 100644
index 00000000..f765e677
--- /dev/null
+++ b/src/google-cloud/gcTextToSpeechClient.ts
@@ -0,0 +1,26 @@
+import GcTextToSpeech, { type TextToSpeechClient } from '@google-cloud/text-to-speech'
+import config from '../config'
+import type { CredentialBody } from 'google-auth-library/build/src/auth/credentials'
+
+class GcTextToSpeechClient {
+  private readonly _client: TextToSpeechClient
+  constructor (credentials: CredentialBody) {
+    this._client = new GcTextToSpeech.TextToSpeechClient({ credentials })
+  }
+
+  async textToSpeech (text: string): Promise<string | Uint8Array | null | undefined> {
+    const ssml = `<speak>${text}</speak>`
+
+    const [response] = await this._client.synthesizeSpeech({
+      input: { ssml },
+      voice: { languageCode: 'en-US', ssmlGender: 'MALE' },
+      audioConfig: { audioEncoding: 'OGG_OPUS' }
+    })
+
+    return response.audioContent
+  }
+}
+
+const credentials = JSON.parse(Buffer.from(config.gc.credentials, 'base64').toString('utf-8'))
+
+export const gcTextToSpeedClient = new GcTextToSpeechClient(credentials)
diff --git a/src/modules/text-to-speech/index.ts b/src/modules/text-to-speech/index.ts
new file mode 100644
index 00000000..111a633a
--- /dev/null
+++ b/src/modules/text-to-speech/index.ts
@@ -0,0 +1,65 @@
+import pino from 'pino'
+import { InputFile } from 'grammy'
+import type { Logger } from 'pino'
+import type { BotPayments } from '../payment'
+import type { OnMessageContext, PayableBot } from '../types'
+import { gcTextToSpeedClient } from '../../google-cloud/gcTextToSpeechClient'
+
+export class TextToSpeechBot implements PayableBot {
+  private readonly payments: BotPayments
+
+  private readonly logger: Logger
+
+  constructor (payments: BotPayments) {
+    this.payments = payments
+    this.logger = pino({
+      name: 'TextToSpeech',
+      transport: {
+        target: 'pino-pretty',
+        options: { colorize: true }
+      }
+    })
+  }
+
+  public isSupportedEvent (ctx: OnMessageContext): boolean {
+    return ctx.hasCommand('voice')
+  }
+
+  public getEstimatedPrice (ctx: OnMessageContext): number {
+    const str = ctx.match?.toString() ?? ''
+
+    return str.length * 0.0001
+  }
+
+  public async onTextToSpeech (ctx: OnMessageContext, message: string): Promise<void> {
+    if (!message) {
+      await ctx.reply('/voice command should contain text.')
+      return
+    }
+
+    if (!ctx.chat?.id) {
+      throw new Error('Internal error')
+    }
+
+    const progressMessage = await ctx.reply('Waite a moment...')
+
+    const voiceResult = await gcTextToSpeedClient.textToSpeech(message)
+
+    if (!voiceResult) {
+      await ctx.api.editMessageText(ctx.chat.id, progressMessage.message_id, 'An error occurred during the process of generating the message.')
+      return
+    }
+
+    const inputFile = new InputFile(voiceResult)
+
+    await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id)
+    await ctx.replyWithVoice(inputFile)
+  }
+
+  public async onEvent (ctx: OnMessageContext): Promise<void> {
+    if (ctx.hasCommand('voice')) {
+      const text = ctx.match.toString()
+      await this.onTextToSpeech(ctx, text)
+    }
+  }
+}
diff --git a/src/modules/voice-translate/client.ts b/src/modules/voice-translate/client.ts
deleted file mode 100644
index 3ce130c2..00000000
--- a/src/modules/voice-translate/client.ts
+++ /dev/null
@@ -1,17 +0,0 @@
-import GcTextToSpeech from '@google-cloud/text-to-speech'
-import config from '../../config'
-
-const credentials = JSON.parse(Buffer.from(config.gc.credentials, 'base64').toString('utf-8'))
-const client = new GcTextToSpeech.TextToSpeechClient({ credentials })
-
-export async function textToSpeech (text: string): Promise<string | Uint8Array | null | undefined> {
-  const ssml = `<speak>${text}</speak>`
-
-  const [response] = await client.synthesizeSpeech({
-    input: { ssml },
-    voice: { languageCode: 'en-US', ssmlGender: 'MALE' },
-    audioConfig: { audioEncoding: 'MP3' }
-  })
-
-  return response.audioContent
-}
diff --git a/src/modules/voice-translate/index.ts b/src/modules/voice-translate/index.ts
index 74923791..870916c0 100644
--- a/src/modules/voice-translate/index.ts
+++ b/src/modules/voice-translate/index.ts
@@ -2,10 +2,12 @@ import fs from 'fs'
 import pino from 'pino'
 import { InputFile } from 'grammy'
 import type { Logger } from 'pino'
-import { textToSpeech } from './client'
+import { gcTextToSpeedClient } from '../../google-cloud/gcTextToSpeechClient'
 import type { BotPayments } from '../payment'
 import { speechToText } from '../open-ai/api/openAi'
 import type { OnMessageContext, PayableBot } from '../types'
+import config from '../../config'
+import { translator } from '../translate/deeplClient'
 
 export class VoiceTranslateBot implements PayableBot {
   private readonly payments: BotPayments
@@ -26,52 +28,27 @@ export class VoiceTranslateBot implements PayableBot {
   public isSupportedEvent (ctx: OnMessageContext): boolean {
     const { voice, audio } = ctx.update.message
 
-    return (!!voice || !!audio) || ctx.hasCommand('voice')
-  }
-
-  public getEstimatedPrice (ctx: OnMessageContext): number {
-    return 0
-  }
-
-  public async onTextToSpeech (ctx: OnMessageContext, message: string): Promise<void> {
-    if (!message) {
-      await ctx.reply('/voice command should contain text.')
-      return
-    }
-
-    if (!ctx.chat?.id) {
-      throw new Error('Internal error')
-    }
-
-    const progressMessage = await ctx.reply('Waite a moment...')
-
-    const voiceResult = await textToSpeech(message)
-
-    if (!voiceResult) {
-      await ctx.api.editMessageText(ctx.chat.id, progressMessage.message_id, 'An error occurred during the process of generating the message.')
-      return
+    if (!config.voiceTranslate.isEnabled) {
+      return false
     }
 
-    const inputFile = new InputFile(voiceResult)
+    return (!!voice || !!audio)
+  }
 
-    await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id)
-    await ctx.replyWithVoice(inputFile)
+  public getEstimatedPrice (ctx: OnMessageContext): number {
+    const { voice, audio } = ctx.update.message
+    const seconds = (voice?.duration ?? audio?.duration) ?? 0
+    return seconds * 0.005
   }
 
   public async onEvent (ctx: OnMessageContext): Promise<void> {
     const { voice, audio } = ctx.update.message
 
-    if (ctx.hasCommand('voice')) {
-      const text = ctx.match.toString()
-      await this.onTextToSpeech(ctx, text)
-      return
-    }
-
     if (!(!!voice || !!audio)) {
       return
     }
 
-    const message = await ctx.reply('Waite a moment...')
+    const progressMessage = await ctx.reply('Waite a moment...')
 
     if (!ctx.chat?.id) {
       throw Error('chat id is undefined')
@@ -92,14 +69,16 @@ export class VoiceTranslateBot implements PayableBot {
     const resultText = await speechToText(fs.createReadStream(filename))
     fs.rmSync(filename)
 
-    const voiceResult = await textToSpeech(resultText)
+    const translateResult = await translator.translateText(resultText, null, 'en-US')
+
+    const voiceResult = await gcTextToSpeedClient.textToSpeech(translateResult.text)
 
     if (!voiceResult) {
       await ctx.reply('voice generation error')
       return
     }
 
-    await ctx.api.editMessageText(ctx.chat.id, message.message_id, resultText)
+    await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id)
 
     const inputFile = new InputFile(voiceResult)