Merge pull request #351 from harmony-one/voice-command

Voice command
harmony-one · Jan 27, 2024 · 50d9f79 · 50d9f79
2 parents 5184555 + 949701a
commit 50d9f79
Show file tree

Hide file tree

Showing 12 changed files with 282 additions and 52 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,4 @@ yarn.lock
 # Sentry Auth Token
 .sentryclirc
 .DS_Store
+public/
diff --git a/src/bot.ts b/src/bot.ts
@@ -53,6 +53,7 @@ import { VoiceToTextBot } from './modules/voice-to-text'
 import { now } from './utils/perf'
 import { hasPrefix } from './modules/open-ai/helpers'
 import { VoiceToVoiceGPTBot } from './modules/voice-to-voice-gpt'
+import { VoiceCommand } from './modules/voice-command'
 
 Events.EventEmitter.defaultMaxListeners = 30
 
@@ -251,6 +252,7 @@ const voiceTranslateBot = new VoiceTranslateBot(payments)
 const textToSpeechBot = new TextToSpeechBot(payments)
 const voiceToTextBot = new VoiceToTextBot(payments)
 const voiceToVoiceGPTBot = new VoiceToVoiceGPTBot(payments)
+const voiceCommand = new VoiceCommand(openAiBot)
 
 bot.on('message:new_chat_members:me', async (ctx) => {
   try {
@@ -358,6 +360,7 @@ const writeCommandLog = async (
 }
 
 const PayableBots: Record<string, PayableBotConfig> = {
+  voiceCommand: { bot: voiceCommand },
   qrCodeBot: { bot: qrCodeBot },
   sdImagesBot: { bot: sdImagesBot },
   voiceTranslate: { bot: voiceTranslateBot },
@@ -397,7 +400,6 @@ const onMessage = async (ctx: OnMessageContext): Promise<void> => {
         await telegramPayments.onEvent(ctx)
         return
       }
-
       for (const config of Object.values(PayableBots)) {
         const bot = config.bot
 

diff --git a/src/config.ts b/src/config.ts
@@ -97,6 +97,10 @@ export default {
       ? process.env.RESTRICTED_PHRASES.split(', ')
       : ['metamask', 'walletconnect']
   },
+  voiceCommand: {
+    isEnabled: true,
+    voiceDuration: 30
+  },
   voiceMemo: {
     isEnabled: Boolean(parseInt(process.env.VOICE_MEMO_ENABLED ?? '1')),
     telegramApiId: parseInt(process.env.TELEGRAM_API_ID ?? ''),

diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
@@ -303,8 +303,9 @@ export class LlmsBot implements PayableBot {
           prompt,
           conversation
         })
+        const price = response.price * config.openAi.chatGpt.priceAdjustment
         if (
-          !(await this.payments.pay(ctx as OnMessageContext, response.price))
+          !(await this.payments.pay(ctx as OnMessageContext, price))
         ) {
           await this.onNotBalanceMessage(ctx)
         } else {
@@ -365,8 +366,9 @@ export class LlmsBot implements PayableBot {
           prompt,
           conversation
         })
+        const price = response.price * config.openAi.chatGpt.priceAdjustment
         if (
-          !(await this.payments.pay(ctx as OnMessageContext, response.price))
+          !(await this.payments.pay(ctx as OnMessageContext, price))
         ) {
           if (ctx.chat?.id) {
             await ctx.api.deleteMessage(ctx.chat?.id, msgId)
@@ -421,13 +423,13 @@ export class LlmsBot implements PayableBot {
           const result = await llmCheckCollectionStatus(collection?.collectionName ?? '')
           if (result.price > 0) {
             if (
-              !(await this.payments.pay(ctx as OnMessageContext, result.price))
+              !(await this.payments.pay(ctx as OnMessageContext, result.price)) // price 0.05 x collections (chunks)
             ) {
               await this.onNotBalanceMessage(ctx)
             } else {
               ctx.session.collections.activeCollections.push(collection)
               if (collection.msgId) {
-                const oneFee = await this.payments.getPriceInONE(result.price)
+                const oneFee = await this.payments.getPriceInONE(result.price) // price in cents
                 let statusMsg
                 if (collection.collectionType === 'URL') {
                   statusMsg = `${collection.url} processed (${this.payments.toONE(oneFee, false).toFixed(2)} ONE fee)`

diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
@@ -329,6 +329,5 @@ export async function speechToText (readStream: fs.ReadStream): Promise<string>
     file: readStream,
     model: 'whisper-1'
   })
-
   return result.text
 }
diff --git a/src/modules/open-ai/helpers.ts b/src/modules/open-ai/helpers.ts
@@ -21,7 +21,8 @@ export enum SupportedCommands {
   dalleShorter = 'i',
   genImgEn = 'genImgEn',
   on = 'on',
-  off = 'off'
+  off = 'off',
+  talk = 'talk'
 }
 
 export const MAX_TRIES = 3

diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
@@ -13,6 +13,7 @@ import {
 } from '../types'
 import {
   alterGeneratedImg,
+  chatCompletion,
   getChatModel,
   getDalleModel,
   getDalleModelPrice,
@@ -47,6 +48,7 @@ import { AxiosError } from 'axios'
 import { Callbacks } from '../types'
 import { LlmsBot } from '../llms'
 import { type PhotoSize } from 'grammy/types'
+import { responseWithVoice } from '../voice-to-voice-gpt/helpers'
 
 const priceAdjustment = config.openAi.chatGpt.priceAdjustment
 export class OpenAIBot implements PayableBot {
@@ -153,6 +155,72 @@ export class OpenAIBot implements PayableBot {
     return false
   }
 
+  public async voiceCommand (ctx: OnMessageContext | OnCallBackQueryData, command: string, transcribedText: string): Promise<void> {
+    try {
+      let prompt = transcribedText.slice(command.length).replace(/^[.,\s]+/, '')
+      switch (command) {
+        case SupportedCommands.vision: {
+          const photo = ctx.message?.photo ?? ctx.message?.reply_to_message?.photo
+          if (photo) {
+            ctx.session.openAi.imageGen.imgRequestQueue.push({
+              prompt,
+              photo,
+              command
+            })
+            if (!ctx.session.openAi.imageGen.isProcessingQueue) {
+              ctx.session.openAi.imageGen.isProcessingQueue = true
+              await this.onImgRequestHandler(ctx).then(() => {
+                ctx.session.openAi.imageGen.isProcessingQueue = false
+              })
+            }
+          }
+          break
+        }
+        case SupportedCommands.ask:
+        case SupportedCommands.talk: {
+          if (this.botSuspended) {
+            ctx.transient.analytics.sessionState = RequestState.Error
+            await sendMessage(ctx, 'The bot is suspended').catch(async (e) => { await this.onError(ctx, e) })
+            ctx.transient.analytics.actualResponseTime = now()
+            return
+          }
+          const adaptedPrompt = (SupportedCommands.talk === command
+            ? 'Keep it short, like a phone call'
+            : '') + await preparePrompt(ctx, prompt)
+          ctx.session.openAi.chatGpt.requestQueue.push({
+            prompt: adaptedPrompt,
+            outputFormat: SupportedCommands.ask === command ? 'text' : 'voice'
+          })
+          if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
+            ctx.session.openAi.chatGpt.isProcessingQueue = true
+            await this.onChatRequestHandler(ctx).then(() => {
+              ctx.session.openAi.chatGpt.isProcessingQueue = false
+            })
+          }
+          break
+        }
+        case SupportedCommands.dalleImg: {
+          if (!prompt || prompt.split(' ').length === 1) {
+            prompt = config.openAi.dalle.defaultPrompt
+          }
+          ctx.session.openAi.imageGen.imgRequestQueue.push({
+            command: 'dalle',
+            prompt
+          })
+          if (!ctx.session.openAi.imageGen.isProcessingQueue) {
+            ctx.session.openAi.imageGen.isProcessingQueue = true
+            await this.onImgRequestHandler(ctx).then(() => {
+              ctx.session.openAi.imageGen.isProcessingQueue = false
+            })
+          }
+          break
+        }
+      }
+    } catch (e: any) {
+      await this.onError(ctx, e)
+    }
+  }
+
   public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
     ctx.transient.analytics.module = this.module
     if (!(this.isSupportedEvent(ctx)) && (ctx.chat?.type !== 'private') && !ctx.session.openAi.chatGpt.isFreePromptChatGroups) {
@@ -346,7 +414,7 @@ export class OpenAIBot implements PayableBot {
     )
   }
 
-  private async promptGen (data: ChatPayload, msgId?: number): Promise< { price: number, chat: ChatConversation[] }> {
+  private async completionGen (data: ChatPayload, msgId?: number, outputFormat = 'text'): Promise< { price: number, chat: ChatConversation[] }> {
     const { conversation, ctx, model } = data
     try {
       if (!msgId) {
@@ -359,29 +427,42 @@ export class OpenAIBot implements PayableBot {
           })
         ).message_id
       }
-      const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
-      if (isTypingEnabled) {
-        ctx.chatAction = 'typing'
-      }
-      const completion = await streamChatCompletion(
-        conversation,
-        ctx,
-        model,
-        msgId,
-        true // telegram messages has a character limit
-      )
-      if (isTypingEnabled) {
-        ctx.chatAction = null
-      }
-      if (completion) {
-        ctx.transient.analytics.sessionState = RequestState.Success
-        ctx.transient.analytics.actualResponseTime = now()
-        const price = getPromptPrice(completion, data)
-        this.logger.info(
-          `streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens  }
+      if (outputFormat === 'text') {
+        const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
+        if (isTypingEnabled) {
+          ctx.chatAction = 'typing'
+        }
+        const completion = await streamChatCompletion(
+          conversation,
+          ctx,
+          model,
+          msgId,
+          true // telegram messages has a character limit
         )
+        if (isTypingEnabled) {
+          ctx.chatAction = null
+        }
+        if (completion) {
+          ctx.transient.analytics.sessionState = RequestState.Success
+          ctx.transient.analytics.actualResponseTime = now()
+          const price = getPromptPrice(completion, data)
+          this.logger.info(
+            `streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens  }
+          )
+          return {
+            price: price.price,
+            chat: conversation
+          }
+        }
+      } else {
+        const response = await chatCompletion(conversation, ChatGPTModelsEnum.GPT_35_TURBO_16K)
+        conversation.push({
+          role: 'system',
+          content: response.completion
+        })
+        await responseWithVoice(response.completion, ctx as OnMessageContext, msgId)
         return {
-          price: price.price,
+          price: response.price,
           chat: conversation
         }
       }
@@ -408,9 +489,10 @@ export class OpenAIBot implements PayableBot {
       }
       const { username } = ctx.me
       const prompt = ctx.message?.text?.slice(username.length + 1) ?? '' // @
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, prompt)
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, prompt),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -433,9 +515,10 @@ export class OpenAIBot implements PayableBot {
         return
       }
       const prompt = ctx.message?.text?.slice(prefix.length) ?? ''
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, prompt)
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, prompt),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -455,9 +538,10 @@ export class OpenAIBot implements PayableBot {
         ctx.transient.analytics.actualResponseTime = now()
         return
       }
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, ctx.message?.text ?? '')
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, ctx.message?.text ?? ''),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -495,9 +579,10 @@ export class OpenAIBot implements PayableBot {
       if (await this.freePromptChatGroup(ctx, prompt as string)) {
         return
       }
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, prompt as string)
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, prompt as string),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -527,7 +612,7 @@ export class OpenAIBot implements PayableBot {
             ctx.transient.analytics.actualResponseTime = now()
             return
           }
-          const { url, newPrompt } = hasUrl(ctx, prompt)
+          const { url, newPrompt } = hasUrl(ctx, prompt.prompt)
           const hasCode = hasCodeSnippet(ctx)
           if (chatConversation.length === 0 && (hasCode || !url)) {
             chatConversation.push({
@@ -540,14 +625,14 @@ export class OpenAIBot implements PayableBot {
           } else {
             chatConversation.push({
               role: 'user',
-              content: prompt
+              content: prompt.prompt
             })
             const payload = {
               conversation: chatConversation,
               model: model || config.openAi.chatGpt.model,
               ctx
             }
-            const result = await this.promptGen(payload)
+            const result = await this.completionGen(payload, prompt.msgId, prompt.outputFormat)
             ctx.session.openAi.chatGpt.chatConversation = [...result.chat]
             if (
               !(await this.payments.pay(ctx as OnMessageContext, result.price))
@@ -660,7 +745,7 @@ export class OpenAIBot implements PayableBot {
             ctx
           })
           this.logger.info(
-            `streamChatCompletion result = tokens: ${
+            `streamChatVisionCompletion result = tokens: ${
                 price.promptTokens + price.completionTokens
             } | ${model} | price: ${price.price}¢`
           )

diff --git a/src/modules/types.ts b/src/modules/types.ts
@@ -59,14 +59,21 @@ export interface ImageRequest {
   photo?: PhotoSize[] | undefined
   photoUrl?: string[]
 }
+
+export interface promptRequest {
+  prompt: string
+  msgId?: number
+  outputFormat?: 'text' | 'voice'
+  commandPrefix?: string
+}
 export interface ChatGptSessionData {
   model: string
   isEnabled: boolean
   isFreePromptChatGroups: boolean
   chatConversation: ChatConversation[]
   usage: number
   price: number
-  requestQueue: string[]
+  requestQueue: promptRequest[]
   isProcessingQueue: boolean
 }