Added raw demo

ixed ADC auth
harmony-one · Sep 26, 2023 · 9216a8c · 9216a8c
1 parent 703a24c
commit 9216a8c
Show file tree

Hide file tree

Showing 8 changed files with 516 additions and 17 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -74,9 +74,11 @@
   },
   "dependencies": {
     "@elastic/elasticsearch": "^8.9.0",
+    "@google-cloud/text-to-speech": "^5.0.1",
     "@grammyjs/auto-chat-action": "^0.1.1",
     "@grammyjs/auto-retry": "^1.1.1",
     "@grammyjs/conversations": "^1.1.2",
+    "@grammyjs/files": "^1.0.4",
     "@grammyjs/menu": "^1.2.1",
     "@grammyjs/ratelimiter": "^1.2.0",
     "@grammyjs/runner": "^2.0.3",
@@ -93,7 +95,7 @@
     "express-async-handler": "^1.2.0",
     "form-data": "^4.0.0",
     "gpt-tokenizer": "^2.1.1",
-    "grammy": "^1.17.1",
+    "grammy": "^1.18.3",
     "jsqr": "^1.4.0",
     "litllm": "^3.0.0",
     "lokijs": "^1.5.12",

diff --git a/src/bot.ts b/src/bot.ts
@@ -49,6 +49,8 @@ import * as Sentry from '@sentry/node'
 import * as Events from 'events'
 import { ProfilingIntegration } from '@sentry/profiling-node'
 import { ES } from './es'
+import { hydrateFiles } from '@grammyjs/files'
+import { VoiceTranslateBot } from './modules/voice-translate'
 
 Events.EventEmitter.defaultMaxListeners = 30
 
@@ -61,6 +63,7 @@ const logger = pino({
 })
 
 export const bot = new Bot<BotContext>(config.telegramBotAuthToken)
+bot.api.config.use(hydrateFiles(bot.token))
 bot.api.config.use(autoRetry())
 
 bot.use(
@@ -212,6 +215,7 @@ const translateBot = new TranslateBot()
 const llmsBot = new LlmsBot(payments)
 const documentBot = new DocumentHandler()
 const telegramPayments = new TelegramPayments(payments)
+const voiceTranslateBot = new VoiceTranslateBot(payments)
 
 bot.on('message:new_chat_members:me', async (ctx) => {
   try {
@@ -321,6 +325,7 @@ const writeCommandLog = async (
 const PayableBots: Record<string, PayableBotConfig> = {
   qrCodeBot: { bot: qrCodeBot },
   sdImagesBot: { bot: sdImagesBot },
+  voiceTranslate: { bot: voiceTranslateBot },
   voiceMemo: { bot: voiceMemo },
   documentBot: { bot: documentBot },
   translateBot: { bot: translateBot },

diff --git a/src/config.ts b/src/config.ts
@@ -148,5 +148,5 @@ export default {
     password: process.env.ES_PASSWORD ?? '',
     index: process.env.ES_INDEX
   },
-  deepL: { apikey: process.env.DEEPL_API_KEY ?? '' }
+  gc: { credentials: process.env.GC_CREDENTIALS ?? '' }
 }
diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
@@ -17,6 +17,7 @@ import {
   type DalleGPTModel,
   DalleGPTModels
 } from '../types'
+import type fs from 'fs'
 
 const openai = new OpenAI({ apiKey: config.openAiKey })
 
@@ -245,3 +246,12 @@ export function getGrammy429Error (): GrammyError {
     { parameters: { retry_after: 33 } }
   )
 }
+
+export async function speechToText (readStream: fs.ReadStream): Promise<string> {
+  const result = await openai.audio.transcriptions.create({
+    file: readStream,
+    model: 'whisper-1'
+  })
+
+  return result.text
+}
diff --git a/src/modules/types.ts b/src/modules/types.ts
@@ -11,6 +11,7 @@ import {
 import { type AutoChatActionFlavor } from '@grammyjs/auto-chat-action'
 import { type ParseMode } from 'grammy/types'
 import { type InlineKeyboardMarkup } from 'grammy/out/types'
+import type { FileFlavor } from '@grammyjs/files'
 
 export interface ImageGenSessionData {
   numImages: number
@@ -99,10 +100,10 @@ export interface BotSessionData {
   analytics: Analytics
 }
 
-export type BotContext = Context &
+export type BotContext = FileFlavor<Context &
 SessionFlavor<BotSessionData> &
 ConversationFlavor &
-AutoChatActionFlavor
+AutoChatActionFlavor>
 
 export type CustomContext<Q extends FilterQuery> = Filter<BotContext, Q>
 export type OnMessageContext = CustomContext<'message'>

diff --git a/src/modules/voice-translate/client.ts b/src/modules/voice-translate/client.ts
@@ -0,0 +1,17 @@
+import GcTextToSpeech from '@google-cloud/text-to-speech'
+import config from '../../config'
+
+const credentials = JSON.parse(Buffer.from(config.gc.credentials, 'base64').toString('utf-8'))
+const client = new GcTextToSpeech.TextToSpeechClient({ credentials })
+
+export async function textToSpeech (text: string): Promise<string | Uint8Array | null | undefined> {
+  const ssml = `<speak>${text}</speak>`
+
+  const [response] = await client.synthesizeSpeech({
+    input: { ssml },
+    voice: { languageCode: 'en-US', ssmlGender: 'MALE' },
+    audioConfig: { audioEncoding: 'MP3' }
+  })
+
+  return response.audioContent
+}
diff --git a/src/modules/voice-translate/index.ts b/src/modules/voice-translate/index.ts
@@ -0,0 +1,77 @@
+import fs from 'fs'
+import pino from 'pino'
+import { InputFile } from 'grammy'
+import type { Logger } from 'pino'
+import { textToSpeech } from './client'
+import type { BotPayments } from '../payment'
+import { speechToText } from '../open-ai/api/openAi'
+import type { OnMessageContext, PayableBot } from '../types'
+
+export class VoiceTranslateBot implements PayableBot {
+  private readonly payments: BotPayments
+
+  private readonly logger: Logger
+
+  constructor (payments: BotPayments) {
+    this.payments = payments
+    this.logger = pino({
+      name: 'VoiceTranslate',
+      transport: {
+        target: 'pino-pretty',
+        options: { colorize: true }
+      }
+    })
+  }
+
+  public isSupportedEvent (ctx: OnMessageContext): boolean {
+    const { voice, audio } = ctx.update.message
+
+    return (!!voice || !!audio)
+  }
+
+  public getEstimatedPrice (ctx: OnMessageContext): number {
+    return 0
+  }
+
+  public async onEvent (ctx: OnMessageContext): Promise<void> {
+    const { voice, audio } = ctx.update.message
+
+    if (!(!!voice || !!audio)) {
+      return
+    }
+
+    const message = await ctx.reply('Waite a moment...')
+
+    if (!ctx.chat?.id) {
+      throw Error('chat id is undefined')
+    }
+
+    const file = await ctx.getFile()
+    const path = await file.download()
+
+    let ext = 'ogg'
+
+    if (file.file_path) {
+      ext = file.file_path.split('.').pop() ?? ext
+    }
+
+    const filename = path + '.' + ext
+    fs.renameSync(path, filename)
+
+    const resultText = await speechToText(fs.createReadStream(filename))
+    fs.rmSync(filename)
+
+    const voiceResult = await textToSpeech(resultText)
+
+    if (!voiceResult) {
+      await ctx.reply('voice generation error')
+      return
+    }
+
+    await ctx.api.editMessageText(ctx.chat.id, message.message_id, resultText)
+
+    const inputFile = new InputFile(voiceResult)
+
+    await ctx.replyWithVoice(inputFile)
+  }
+}