add stream completion for vision prompt

harmony-one · Jan 16, 2024 · b269034 · b269034
1 parent aaa1bf4
commit b269034
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 26 deletions.
diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
@@ -629,7 +629,7 @@ export class LlmsBot implements PayableBot {
     while (ctx.session.llms.requestQueue.length > 0) {
       try {
         const msg = ctx.session.llms.requestQueue.shift()
-        const prompt = msg?.content
+        const prompt = msg?.content as string
         const model = msg?.model
         const { chatConversation } = ctx.session.llms
         if (await this.hasBalance(ctx)) {
@@ -648,7 +648,7 @@ export class LlmsBot implements PayableBot {
             return
           }
           const chat: ChatConversation = {
-            content: limitPrompt(prompt as string),
+            content: limitPrompt(prompt),
             model
           }
           if (model === LlmsModelsEnum.BISON) {

diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
@@ -1,10 +1,10 @@
 import OpenAI from 'openai'
 import { encode } from 'gpt-tokenizer'
 import { GrammyError } from 'grammy'
-
 import config from '../../../config'
 import { deleteFile, getImage } from '../utils/file'
 import {
+  // type VisionContent,
   type ChatCompletion,
   type ChatConversation,
   type OnCallBackQueryData,
@@ -15,10 +15,12 @@ import {
   type ChatModel,
   ChatGPTModels,
   type DalleGPTModel,
-  DalleGPTModels
+  DalleGPTModels,
+  ChatGPTModelsEnum
 } from '../types'
 import type fs from 'fs'
 import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
+import { type Stream } from 'openai/streaming'
 
 const openai = new OpenAI({ apiKey: config.openAiKey })
 
@@ -204,18 +206,28 @@ export const streamChatCompletion = async (
     })
   return completion
 }
+
+// interface VisionCompletionBody {
+//   model: string
+//   messages: Array<{
+//     role: string
+//     content: string | VisionContent[]
+//   }>
+//   stream: boolean
+//   max_tokens: number | undefined
+// }
 export const streamChatVisionCompletion = async (
   conversation: ChatConversation[],
   ctx: OnMessageContext | OnCallBackQueryData,
-  model = 'gpt-4-vision-preview',
+  model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW,
   prompt: string,
   imgUrl: string,
   msgId: number,
   limitTokens = true
 ): Promise<string> => {
   let completion = ''
   let wordCountMinimum = 2
-  const payload = {
+  const payload: any = {
     model,
     messages: [
       {
@@ -230,14 +242,14 @@ export const streamChatVisionCompletion = async (
       }
     ],
     stream: true,
-    max_tokens: 300
+    max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined
   }
-  const stream = await openai.chat.completions.create(payload as any)
+  const stream = await openai.chat.completions.create(payload) as unknown as Stream<OpenAI.Chat.Completions.ChatCompletionChunk>
   let wordCount = 0
   if (!ctx.chat?.id) {
     throw new Error('Context chat id should not be empty after openAI streaming')
   }
-  for await (const part of stream as any) {
+  for await (const part of stream) {
     wordCount++
     const chunck = part.choices[0]?.delta?.content
       ? part.choices[0]?.delta?.content

diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
@@ -14,12 +14,12 @@ import {
 } from '../types'
 import {
   alterGeneratedImg,
-  chatCompletion,
   getChatModel,
   getDalleModel,
   getDalleModelPrice,
   postGenerateImg,
-  streamChatCompletion
+  streamChatCompletion,
+  streamChatVisionCompletion
 } from './api/openAi'
 import { appText } from './utils/text'
 import { chatService } from '../../database/services'
@@ -578,7 +578,7 @@ export class OpenAIBot implements PayableBot {
         ctx.chatAction = 'upload_photo'
         // eslint-disable-next-line @typescript-eslint/naming-convention
         const { message_id } = await ctx.reply(
-          'Generating dalle image...', { message_thread_id: ctx.message?.message_thread_id }
+          'Generating image via OpenAI\'s DALL·E 3...', { message_thread_id: ctx.message?.message_thread_id }
         )
         const numImages = ctx.session.openAi.imageGen.numImages
         const imgSize = ctx.session.openAi.imageGen.imgSize
@@ -640,21 +640,11 @@ export class OpenAIBot implements PayableBot {
           }
         ]
         const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW
-        const completion = await chatCompletion(messages as any, model, true)
+        const completion = await streamChatVisionCompletion(messages, ctx, model, prompt ?? '', filePath, msgId, true)
         if (completion) {
-          await ctx.api
-            .editMessageText(`${ctx.chat?.id}`, msgId, completion.completion)
-            .catch(async (e: any) => {
-              await this.onError(
-                ctx,
-                e,
-                MAX_TRIES,
-                'An error occurred while generating the AI edit'
-              )
-            })
           ctx.transient.analytics.sessionState = RequestState.Success
           ctx.transient.analytics.actualResponseTime = now()
-          const price = getPromptPrice(completion.completion, {
+          const price = getPromptPrice(completion, {
             conversation: [],
             prompt,
             model,

diff --git a/src/modules/types.ts b/src/modules/types.ts
@@ -49,7 +49,7 @@ export interface VisionContent {
 export interface ChatConversation {
   role?: string
   author?: string
-  content: string | [VisionContent]
+  content: string | VisionContent[]
   model?: string
 }
 

diff --git a/tsconfig.json b/tsconfig.json
@@ -4,7 +4,7 @@
     "module": "CommonJS",
     "outDir": "dist",
     "types": ["node"],
-    "lib": ["es2022"],
+    "lib": ["es2022"], // , "dom", "dom.iterable"]
     "target": "es2020",
     "emitDecoratorMetadata": true,
     "experimentalDecorators": true,