From 0844a77868b5ce0f0e3cc05e7dacaeaeaa4e13f2 Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Sun, 14 Jan 2024 19:13:21 -0500
Subject: [PATCH 1/5] add first vision logic

---
 src/modules/open-ai/api/openAi.ts | 115 ++++++++++++++++++++++++++++--
 src/modules/open-ai/index.ts      |  78 ++++++++++++++++++--
 src/modules/types.ts              |   2 +-
 3 files changed, 182 insertions(+), 13 deletions(-)
diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
index d1d431d..a0b46b2 100644
--- a/src/modules/open-ai/api/openAi.ts
+++ b/src/modules/open-ai/api/openAi.ts
@@ -18,6 +18,7 @@ import {
   DalleGPTModels
 } from '../types'
 import type fs from 'fs'
+import { type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
 
 const openai = new OpenAI({ apiKey: config.openAiKey })
 
@@ -48,6 +49,34 @@ export async function postGenerateImg (
   return response.data
 }
 
+export async function imgInquiryWithVision (
+  img: string,
+  prompt: string,
+  ctx: OnMessageContext | OnCallBackQueryData
+): Promise<string> {
+  console.log(img, prompt)
+  const payLoad = {
+    model: 'gpt-4-vision-preview',
+    messages: [
+      {
+        role: 'user',
+        content: [
+          { type: 'text', text: 'What’s in this image?' },
+          {
+            type: 'image_url',
+            image_url: { url: img }
+          }
+        ]
+      }
+    ],
+    max_tokens: 300
+  }
+  console.log('HELLO')
+  const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming)
+  console.log(response.choices[0].message?.content)
+  return 'hi'
+}
+
 export async function alterGeneratedImg (
   prompt: string,
   filePath: string,
@@ -177,13 +206,85 @@ export const streamChatCompletion = async (
       }
     })
   return completion
-  // } catch (e) {
-  //   reject(e)
-  // }
-  //   })
-  // } catch (error: any) {
-  //   return await Promise.reject(error)
-  // }
+}
+export const streamChatVisionCompletion = async (
+  conversation: ChatConversation[],
+  ctx: OnMessageContext | OnCallBackQueryData,
+  model = 'gpt-4-vision-preview',
+  prompt: string,
+  imgUrl: string,
+  msgId: number,
+  limitTokens = true
+): Promise<string> => {
+  let completion = ''
+  let wordCountMinimum = 2
+  const payload = {
+    model,
+    messages: [
+      {
+        role: 'user',
+        content: [
+          { type: 'text', text: prompt },
+          {
+            type: 'image_url',
+            image_url: { url: imgUrl }
+          }
+        ]
+      }
+    ],
+    stream: true,
+    max_tokens: 300
+  }
+  const stream = await openai.chat.completions.create(payload as any)
+  let wordCount = 0
+  if (!ctx.chat?.id) {
+    throw new Error('Context chat id should not be empty after openAI streaming')
+  }
+  for await (const part of stream as any) {
+    wordCount++
+    const chunck = part.choices[0]?.delta?.content
+      ? part.choices[0]?.delta?.content
+      : ''
+    completion += chunck
+
+    if (wordCount > wordCountMinimum) {
+      if (wordCountMinimum < 64) {
+        wordCountMinimum *= 2
+      }
+      completion = completion.replaceAll('...', '')
+      completion += '...'
+      wordCount = 0
+      await ctx.api
+        .editMessageText(ctx.chat?.id, msgId, completion)
+        .catch(async (e: any) => {
+          if (e instanceof GrammyError) {
+            if (e.error_code !== 400) {
+              throw e
+            } else {
+              logger.error(e)
+            }
+          } else {
+            throw e
+          }
+        })
+    }
+  }
+  completion = completion.replaceAll('...', '')
+
+  await ctx.api
+    .editMessageText(ctx.chat?.id, msgId, completion)
+    .catch((e: any) => {
+      if (e instanceof GrammyError) {
+        if (e.error_code !== 400) {
+          throw e
+        } else {
+          logger.error(e)
+        }
+      } else {
+        throw e
+      }
+    })
+  return completion
 }
 
 export async function improvePrompt (promptText: string, model: string): Promise<string> {
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
index 809006a..c96ba90 100644
--- a/src/modules/open-ai/index.ts
+++ b/src/modules/open-ai/index.ts
@@ -18,7 +18,8 @@ import {
   getDalleModel,
   getDalleModelPrice,
   postGenerateImg,
-  streamChatCompletion
+  streamChatCompletion,
+  streamChatVisionCompletion
 } from './api/openAi'
 import { appText } from './utils/text'
 import { chatService } from '../../database/services'
@@ -142,7 +143,7 @@ export class OpenAIBot implements PayableBot {
     const photo = ctx.message?.photo ?? ctx.message?.reply_to_message?.photo
     if (photo && ctx.session.openAi.imageGen.isEnabled) {
       const prompt = ctx.message?.caption ?? ctx.message?.text
-      if (prompt && !isNaN(+prompt)) {
+      if (prompt) { // && !isNaN(+prompt)
         return true
       }
     }
@@ -161,11 +162,11 @@ export class OpenAIBot implements PayableBot {
 
     if (this.isSupportedImageReply(ctx)) {
       const photo = ctx.message?.photo ?? ctx.message?.reply_to_message?.photo
-      const prompt = ctx.message?.caption ?? ctx.message?.text
+      const prompt = ctx.message?.caption ?? ctx.message?.text ?? ''
       ctx.session.openAi.imageGen.imgRequestQueue.push({
         prompt,
         photo,
-        command: 'alter'
+        command: !isNaN(+prompt) ? 'alter' : 'vision'
       })
       if (!ctx.session.openAi.imageGen.isProcessingQueue) {
         ctx.session.openAi.imageGen.isProcessingQueue = true
@@ -556,8 +557,10 @@ export class OpenAIBot implements PayableBot {
         if (await this.hasBalance(ctx)) {
           if (img?.command === 'dalle') {
             await this.onGenImgCmd(img?.prompt, ctx)
-          } else {
+          } else if (img?.command === 'alter') {
             await this.onAlterImage(img?.photo, img?.prompt, ctx)
+          } else {
+            await this.onInquiryImage(img?.photo, img?.prompt, ctx)
           }
           ctx.chatAction = null
         } else {
@@ -606,6 +609,71 @@ export class OpenAIBot implements PayableBot {
     }
   }
 
+  // imgInquiryWithVision = async (
+  //   img: string,
+  //   prompt: string,
+  //   ctx: OnMessageContext | OnCallBackQueryData
+  // ): Promise<string> => {
+  //   console.log(img, prompt)
+  //   console.log('HELLO')
+  //   const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming)
+  //   console.log(response.choices[0].message?.content)
+  //   return 'hi'
+  // }
+
+  onInquiryImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise<void> => {
+    try {
+      if (ctx.session.openAi.imageGen.isEnabled) {
+        const fileId = photo?.pop()?.file_id // with pop() get full image quality
+        if (!fileId) {
+          await ctx.reply('Cannot retrieve the image file. Please try again.')
+          ctx.transient.analytics.actualResponseTime = now()
+          return
+        }
+        const file = await ctx.api.getFile(fileId)
+        const filePath = `${config.openAi.dalle.telegramFileUrl}${config.telegramBotAuthToken}/${file.file_path}`
+        const msgId = (
+          await ctx.reply('...', {
+            message_thread_id:
+              ctx.message?.message_thread_id ??
+              ctx.message?.reply_to_message?.message_thread_id
+          })
+        ).message_id
+        const completion = await streamChatVisionCompletion([], ctx, 'gpt-4-vision-preview', prompt ?? '', filePath, msgId, true)
+        console.log(completion)
+        // const inquiry = await imgInquiryWithVision(filePath, prompt ?? '', ctx)
+        // console.log(inquiry)
+        // const imgSize = ctx.session.openAi.imageGen.imgSize
+        // ctx.chatAction = 'upload_photo'
+        // const imgs = await alterGeneratedImg(prompt ?? '', filePath, ctx, imgSize)
+        // if (imgs) {
+        //   imgs.map(async (img: any) => {
+        //     if (img?.url) {
+        //       await ctx
+        //         .replyWithPhoto(img.url, { message_thread_id: ctx.message?.message_thread_id })
+        //         .catch(async (e) => {
+        //           await this.onError(
+        //             ctx,
+        //             e,
+        //             MAX_TRIES,
+        //             'There was an error while generating the image'
+        //           )
+        //         })
+        //     }
+        //   })
+        // }
+        // ctx.chatAction = null
+      }
+    } catch (e: any) {
+      await this.onError(
+        ctx,
+        e,
+        MAX_TRIES,
+        'An error occurred while generating the AI edit'
+      )
+    }
+  }
+
   onAlterImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise<void> => {
     try {
       if (ctx.session.openAi.imageGen.isEnabled) {
diff --git a/src/modules/types.ts b/src/modules/types.ts
index 2d34a5b..928c12b 100644
--- a/src/modules/types.ts
+++ b/src/modules/types.ts
@@ -47,7 +47,7 @@ export interface ChatConversation {
 }
 
 export interface ImageRequest {
-  command?: 'dalle' | 'alter'
+  command?: 'dalle' | 'alter' | 'vision'
   prompt?: string
   photo?: PhotoSize[] | undefined
 }

From 939557796a37aecc4fff4ce5c3a90ebf22428c1a Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Mon, 15 Jan 2024 18:01:01 -0500
Subject: [PATCH 2/5] update vision logic to comply with openai api

---
 src/modules/llms/index.ts         |  4 +-
 src/modules/open-ai/api/openAi.ts | 13 ++---
 src/modules/open-ai/helpers.ts    |  4 +-
 src/modules/open-ai/index.ts      | 86 +++++++++++++++++--------------
 src/modules/open-ai/types.ts      |  8 +++
 src/modules/payment/index.ts      |  1 -
 src/modules/types.ts              |  9 +++-
 7 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
index 35b381b..a808715 100644
--- a/src/modules/llms/index.ts
+++ b/src/modules/llms/index.ts
@@ -550,7 +550,7 @@ export class LlmsBot implements PayableBot {
       await ctx.api.editMessageText(
         ctx.chat.id,
         msgId,
-        response.completion.content
+        response.completion.content as string
       )
       conversation.push(response.completion)
       // const price = getPromptPrice(completion, data);
@@ -648,7 +648,7 @@ export class LlmsBot implements PayableBot {
             return
           }
           const chat: ChatConversation = {
-            content: limitPrompt(prompt),
+            content: limitPrompt(prompt as string),
             model
           }
           if (model === LlmsModelsEnum.BISON) {
diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
index a0b46b2..333b70c 100644
--- a/src/modules/open-ai/api/openAi.ts
+++ b/src/modules/open-ai/api/openAi.ts
@@ -18,7 +18,7 @@ import {
   DalleGPTModels
 } from '../types'
 import type fs from 'fs'
-import { type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
+import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
 
 const openai = new OpenAI({ apiKey: config.openAiKey })
 
@@ -112,15 +112,12 @@ export async function chatCompletion (
   model = config.openAi.chatGpt.model,
   limitTokens = true
 ): Promise<ChatCompletion> {
-  const payload = {
+  const response = await openai.chat.completions.create({
     model,
     max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined,
     temperature: config.openAi.dalle.completions.temperature,
-    messages: conversation
-  }
-  const response = await openai.chat.completions.create(
-    payload as OpenAI.Chat.CompletionCreateParamsNonStreaming
-  )
+    messages: conversation as ChatCompletionMessageParam[]
+  })
   const chatModel = getChatModel(model)
   if (response.usage?.prompt_tokens === undefined) {
     throw new Error('Unknown number of prompt tokens used')
@@ -149,7 +146,7 @@ export const streamChatCompletion = async (
   let wordCountMinimum = 2
   const stream = await openai.chat.completions.create({
     model,
-    messages: conversation as OpenAI.Chat.Completions.CreateChatCompletionRequestMessage[],
+    messages: conversation as ChatCompletionMessageParam[], // OpenAI.Chat.Completions.CreateChatCompletionRequestMessage[],
     stream: true,
     max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined,
     temperature: config.openAi.dalle.completions.temperature || 0.8
diff --git a/src/modules/open-ai/helpers.ts b/src/modules/open-ai/helpers.ts
index a873124..0e8a1e2 100644
--- a/src/modules/open-ai/helpers.ts
+++ b/src/modules/open-ai/helpers.ts
@@ -235,8 +235,8 @@ export const hasPrefix = (prompt: string): string => {
 export const getPromptPrice = (completion: string, data: ChatPayload): { price: number, promptTokens: number, completionTokens: number } => {
   const { conversation, ctx, model } = data
 
-  const prompt = conversation[conversation.length - 1].content
-  const promptTokens = getTokenNumber(prompt)
+  const prompt = data.prompt ? data.prompt : conversation[conversation.length - 1].content
+  const promptTokens = getTokenNumber(prompt as string)
   const completionTokens = getTokenNumber(completion)
   const modelPrice = getChatModel(model)
   const price =
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
index c96ba90..bf76cc0 100644
--- a/src/modules/open-ai/index.ts
+++ b/src/modules/open-ai/index.ts
@@ -14,12 +14,12 @@ import {
 } from '../types'
 import {
   alterGeneratedImg,
+  chatCompletion,
   getChatModel,
   getDalleModel,
   getDalleModelPrice,
   postGenerateImg,
-  streamChatCompletion,
-  streamChatVisionCompletion
+  streamChatCompletion
 } from './api/openAi'
 import { appText } from './utils/text'
 import { chatService } from '../../database/services'
@@ -91,7 +91,7 @@ export class OpenAIBot implements PayableBot {
     try {
       const priceAdjustment = config.openAi.chatGpt.priceAdjustment
       const prompts = ctx.match
-      if (this.isSupportedImageReply(ctx)) {
+      if (this.isSupportedImageReply(ctx) && !isNaN(+prompts)) {
         const imageNumber = ctx.message?.caption || ctx.message?.text
         const imageSize = ctx.session.openAi.imageGen.imgSize
         const model = getDalleModel(imageSize)
@@ -609,18 +609,6 @@ export class OpenAIBot implements PayableBot {
     }
   }
 
-  // imgInquiryWithVision = async (
-  //   img: string,
-  //   prompt: string,
-  //   ctx: OnMessageContext | OnCallBackQueryData
-  // ): Promise<string> => {
-  //   console.log(img, prompt)
-  //   console.log('HELLO')
-  //   const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming)
-  //   console.log(response.choices[0].message?.content)
-  //   return 'hi'
-  // }
-
   onInquiryImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise<void> => {
     try {
       if (ctx.session.openAi.imageGen.isEnabled) {
@@ -639,30 +627,50 @@ export class OpenAIBot implements PayableBot {
               ctx.message?.reply_to_message?.message_thread_id
           })
         ).message_id
-        const completion = await streamChatVisionCompletion([], ctx, 'gpt-4-vision-preview', prompt ?? '', filePath, msgId, true)
-        console.log(completion)
-        // const inquiry = await imgInquiryWithVision(filePath, prompt ?? '', ctx)
-        // console.log(inquiry)
-        // const imgSize = ctx.session.openAi.imageGen.imgSize
-        // ctx.chatAction = 'upload_photo'
-        // const imgs = await alterGeneratedImg(prompt ?? '', filePath, ctx, imgSize)
-        // if (imgs) {
-        //   imgs.map(async (img: any) => {
-        //     if (img?.url) {
-        //       await ctx
-        //         .replyWithPhoto(img.url, { message_thread_id: ctx.message?.message_thread_id })
-        //         .catch(async (e) => {
-        //           await this.onError(
-        //             ctx,
-        //             e,
-        //             MAX_TRIES,
-        //             'There was an error while generating the image'
-        //           )
-        //         })
-        //     }
-        //   })
-        // }
-        // ctx.chatAction = null
+        const messages = [
+          {
+            role: 'user',
+            content: [
+              { type: 'text', text: prompt },
+              {
+                type: 'image_url',
+                image_url: { url: filePath }
+              }
+            ]
+          }
+        ]
+        const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW
+        const completion = await chatCompletion(messages as any, model, true)
+        if (completion) {
+          await ctx.api
+            .editMessageText(`${ctx.chat?.id}`, msgId, completion.completion)
+            .catch(async (e: any) => {
+              await this.onError(
+                ctx,
+                e,
+                MAX_TRIES,
+                'An error occurred while generating the AI edit'
+              )
+            })
+          ctx.transient.analytics.sessionState = RequestState.Success
+          ctx.transient.analytics.actualResponseTime = now()
+          const price = getPromptPrice(completion.completion, {
+            conversation: [],
+            prompt,
+            model,
+            ctx
+          })
+          this.logger.info(
+            `streamChatCompletion result = tokens: ${
+                price.promptTokens + price.completionTokens
+            } | ${model} | price: ${price.price}¢`
+          )
+          if (
+            !(await this.payments.pay(ctx as OnMessageContext, price.price))
+          ) {
+            await this.onNotBalanceMessage(ctx)
+          }
+        }
       }
     } catch (e: any) {
       await this.onError(
diff --git a/src/modules/open-ai/types.ts b/src/modules/open-ai/types.ts
index 46a60c9..3feb117 100644
--- a/src/modules/open-ai/types.ts
+++ b/src/modules/open-ai/types.ts
@@ -16,6 +16,7 @@ export enum ChatGPTModelsEnum {
   GPT_4_32K = 'gpt-4-32k',
   GPT_35_TURBO = 'gpt-3.5-turbo',
   GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k',
+  GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview'
 }
 
 export const ChatGPTModels: Record<string, ChatModel> = {
@@ -46,6 +47,13 @@ export const ChatGPTModels: Record<string, ChatModel> = {
     outputPrice: 0.004,
     maxContextTokens: 16000,
     chargeType: 'TOKEN'
+  },
+  'gpt-4-vision-preview': {
+    name: 'gpt-4-vision-preview',
+    inputPrice: 0.03,
+    outputPrice: 0.06,
+    maxContextTokens: 16000,
+    chargeType: 'TOKEN'
   }
 }
 
diff --git a/src/modules/payment/index.ts b/src/modules/payment/index.ts
index 0b8ba85..5f85568 100644
--- a/src/modules/payment/index.ts
+++ b/src/modules/payment/index.ts
@@ -388,7 +388,6 @@ export class BotPayments {
   public async pay (ctx: OnMessageContext, amountUSD: number): Promise<boolean> {
     // eslint-disable-next-line @typescript-eslint/naming-convention
     const { from, message_id, chat } = ctx.update.message
-
     const accountId = this.getAccountId(ctx)
     const userAccount = this.getUserAccount(accountId)
     if (!userAccount) {
diff --git a/src/modules/types.ts b/src/modules/types.ts
index 928c12b..c50cc60 100644
--- a/src/modules/types.ts
+++ b/src/modules/types.ts
@@ -36,13 +36,20 @@ export interface ChatCompletion {
 }
 export interface ChatPayload {
   conversation: ChatConversation[]
+  prompt?: string
   model: string
   ctx: OnMessageContext | OnCallBackQueryData
 }
+
+export interface VisionContent {
+  type: string
+  text?: string
+  image_url?: { url: string }
+}
 export interface ChatConversation {
   role?: string
   author?: string
-  content: string
+  content: string | [VisionContent]
   model?: string
 }
 

From b2690345480e985c31ed144f16ae0ce46b0f69ab Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Tue, 16 Jan 2024 17:39:32 -0500
Subject: [PATCH 3/5] add stream completion for vision prompt

---
 src/modules/llms/index.ts         |  4 ++--
 src/modules/open-ai/api/openAi.ts | 26 +++++++++++++++++++-------
 src/modules/open-ai/index.ts      | 20 +++++---------------
 src/modules/types.ts              |  2 +-
 tsconfig.json                     |  2 +-
 5 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
index a808715..fbd0d18 100644
--- a/src/modules/llms/index.ts
+++ b/src/modules/llms/index.ts
@@ -629,7 +629,7 @@ export class LlmsBot implements PayableBot {
     while (ctx.session.llms.requestQueue.length > 0) {
       try {
         const msg = ctx.session.llms.requestQueue.shift()
-        const prompt = msg?.content
+        const prompt = msg?.content as string
         const model = msg?.model
         const { chatConversation } = ctx.session.llms
         if (await this.hasBalance(ctx)) {
@@ -648,7 +648,7 @@ export class LlmsBot implements PayableBot {
             return
           }
           const chat: ChatConversation = {
-            content: limitPrompt(prompt as string),
+            content: limitPrompt(prompt),
             model
           }
           if (model === LlmsModelsEnum.BISON) {
diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
index 333b70c..e094016 100644
--- a/src/modules/open-ai/api/openAi.ts
+++ b/src/modules/open-ai/api/openAi.ts
@@ -1,10 +1,10 @@
 import OpenAI from 'openai'
 import { encode } from 'gpt-tokenizer'
 import { GrammyError } from 'grammy'
-
 import config from '../../../config'
 import { deleteFile, getImage } from '../utils/file'
 import {
+  // type VisionContent,
   type ChatCompletion,
   type ChatConversation,
   type OnCallBackQueryData,
@@ -15,10 +15,12 @@ import {
   type ChatModel,
   ChatGPTModels,
   type DalleGPTModel,
-  DalleGPTModels
+  DalleGPTModels,
+  ChatGPTModelsEnum
 } from '../types'
 import type fs from 'fs'
 import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
+import { type Stream } from 'openai/streaming'
 
 const openai = new OpenAI({ apiKey: config.openAiKey })
 
@@ -204,10 +206,20 @@ export const streamChatCompletion = async (
     })
   return completion
 }
+
+// interface VisionCompletionBody {
+//   model: string
+//   messages: Array<{
+//     role: string
+//     content: string | VisionContent[]
+//   }>
+//   stream: boolean
+//   max_tokens: number | undefined
+// }
 export const streamChatVisionCompletion = async (
   conversation: ChatConversation[],
   ctx: OnMessageContext | OnCallBackQueryData,
-  model = 'gpt-4-vision-preview',
+  model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW,
   prompt: string,
   imgUrl: string,
   msgId: number,
@@ -215,7 +227,7 @@ export const streamChatVisionCompletion = async (
 ): Promise<string> => {
   let completion = ''
   let wordCountMinimum = 2
-  const payload = {
+  const payload: any = {
     model,
     messages: [
       {
@@ -230,14 +242,14 @@ export const streamChatVisionCompletion = async (
       }
     ],
     stream: true,
-    max_tokens: 300
+    max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined
   }
-  const stream = await openai.chat.completions.create(payload as any)
+  const stream = await openai.chat.completions.create(payload) as unknown as Stream<OpenAI.Chat.Completions.ChatCompletionChunk>
   let wordCount = 0
   if (!ctx.chat?.id) {
     throw new Error('Context chat id should not be empty after openAI streaming')
   }
-  for await (const part of stream as any) {
+  for await (const part of stream) {
     wordCount++
     const chunck = part.choices[0]?.delta?.content
       ? part.choices[0]?.delta?.content
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
index bf76cc0..805528e 100644
--- a/src/modules/open-ai/index.ts
+++ b/src/modules/open-ai/index.ts
@@ -14,12 +14,12 @@ import {
 } from '../types'
 import {
   alterGeneratedImg,
-  chatCompletion,
   getChatModel,
   getDalleModel,
   getDalleModelPrice,
   postGenerateImg,
-  streamChatCompletion
+  streamChatCompletion,
+  streamChatVisionCompletion
 } from './api/openAi'
 import { appText } from './utils/text'
 import { chatService } from '../../database/services'
@@ -578,7 +578,7 @@ export class OpenAIBot implements PayableBot {
         ctx.chatAction = 'upload_photo'
         // eslint-disable-next-line @typescript-eslint/naming-convention
         const { message_id } = await ctx.reply(
-          'Generating dalle image...', { message_thread_id: ctx.message?.message_thread_id }
+          'Generating image via OpenAI\'s DALL·E 3...', { message_thread_id: ctx.message?.message_thread_id }
         )
         const numImages = ctx.session.openAi.imageGen.numImages
         const imgSize = ctx.session.openAi.imageGen.imgSize
@@ -640,21 +640,11 @@ export class OpenAIBot implements PayableBot {
           }
         ]
         const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW
-        const completion = await chatCompletion(messages as any, model, true)
+        const completion = await streamChatVisionCompletion(messages, ctx, model, prompt ?? '', filePath, msgId, true)
         if (completion) {
-          await ctx.api
-            .editMessageText(`${ctx.chat?.id}`, msgId, completion.completion)
-            .catch(async (e: any) => {
-              await this.onError(
-                ctx,
-                e,
-                MAX_TRIES,
-                'An error occurred while generating the AI edit'
-              )
-            })
           ctx.transient.analytics.sessionState = RequestState.Success
           ctx.transient.analytics.actualResponseTime = now()
-          const price = getPromptPrice(completion.completion, {
+          const price = getPromptPrice(completion, {
             conversation: [],
             prompt,
             model,
diff --git a/src/modules/types.ts b/src/modules/types.ts
index c50cc60..5f8b0af 100644
--- a/src/modules/types.ts
+++ b/src/modules/types.ts
@@ -49,7 +49,7 @@ export interface VisionContent {
 export interface ChatConversation {
   role?: string
   author?: string
-  content: string | [VisionContent]
+  content: string | VisionContent[]
   model?: string
 }
 
diff --git a/tsconfig.json b/tsconfig.json
index 4c6d001..932a5b7 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -4,7 +4,7 @@
     "module": "CommonJS",
     "outDir": "dist",
     "types": ["node"],
-    "lib": ["es2022"],
+    "lib": ["es2022"], // , "dom", "dom.iterable"]
     "target": "es2020",
     "emitDecoratorMetadata": true,
     "experimentalDecorators": true,

From ec86fcd585b074ca4ee58247ed9eeb5fae1db504 Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Tue, 16 Jan 2024 17:40:21 -0500
Subject: [PATCH 4/5] minor change

---
 src/modules/open-ai/api/openAi.ts | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
index e094016..d64957a 100644
--- a/src/modules/open-ai/api/openAi.ts
+++ b/src/modules/open-ai/api/openAi.ts
@@ -4,7 +4,6 @@ import { GrammyError } from 'grammy'
 import config from '../../../config'
 import { deleteFile, getImage } from '../utils/file'
 import {
-  // type VisionContent,
   type ChatCompletion,
   type ChatConversation,
   type OnCallBackQueryData,
@@ -207,15 +206,6 @@ export const streamChatCompletion = async (
   return completion
 }
 
-// interface VisionCompletionBody {
-//   model: string
-//   messages: Array<{
-//     role: string
-//     content: string | VisionContent[]
-//   }>
-//   stream: boolean
-//   max_tokens: number | undefined
-// }
 export const streamChatVisionCompletion = async (
   conversation: ChatConversation[],
   ctx: OnMessageContext | OnCallBackQueryData,

From 695f97907d97b9ef418042d610d7145f1a729197 Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Tue, 16 Jan 2024 19:46:20 -0500
Subject: [PATCH 5/5] add stream completion for vision + add vision command to
 work with multiple img url

---
 src/modules/llms/index.ts         | 18 +++++-----
 src/modules/open-ai/api/openAi.ts | 39 +++------------------
 src/modules/open-ai/helpers.ts    | 17 ++++-----
 src/modules/open-ai/index.ts      | 57 +++++++++++++++++++------------
 src/modules/types.ts              |  1 +
 5 files changed, 60 insertions(+), 72 deletions(-)

diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
index fbd0d18..4696dda 100644
--- a/src/modules/llms/index.ts
+++ b/src/modules/llms/index.ts
@@ -85,7 +85,7 @@ export class LlmsBot implements PayableBot {
     return undefined
   }
 
-  private isSupportedUrlReply (ctx: OnMessageContext | OnCallBackQueryData): string | undefined {
+  private isSupportedUrlReply (ctx: OnMessageContext | OnCallBackQueryData): string[] | undefined {
     return getUrlFromText(ctx)
   }
 
@@ -251,14 +251,16 @@ export class LlmsBot implements PayableBot {
 
   async onUrlReplyHandler (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
     try {
-      const url = getUrlFromText(ctx) ?? ''
-      const prompt = ctx.message?.text ?? 'summarize'
-      const collection = ctx.session.collections.activeCollections.find(c => c.url === url)
-      const newPrompt = `${prompt}` // ${url}
-      if (collection) {
-        await this.queryUrlCollection(ctx, url, newPrompt)
+      const url = getUrlFromText(ctx)
+      if (url) {
+        const prompt = ctx.message?.text ?? 'summarize'
+        const collection = ctx.session.collections.activeCollections.find(c => c.url === url[0])
+        const newPrompt = `${prompt}` // ${url}
+        if (collection) {
+          await this.queryUrlCollection(ctx, url[0], newPrompt)
+        }
+        ctx.transient.analytics.actualResponseTime = now()
       }
-      ctx.transient.analytics.actualResponseTime = now()
     } catch (e: any) {
       await this.onError(ctx, e)
     }
diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts
index d64957a..ad34603 100644
--- a/src/modules/open-ai/api/openAi.ts
+++ b/src/modules/open-ai/api/openAi.ts
@@ -18,7 +18,7 @@ import {
   ChatGPTModelsEnum
 } from '../types'
 import type fs from 'fs'
-import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
+import { type ChatCompletionMessageParam } from 'openai/resources/chat/completions'
 import { type Stream } from 'openai/streaming'
 
 const openai = new OpenAI({ apiKey: config.openAiKey })
@@ -50,34 +50,6 @@ export async function postGenerateImg (
   return response.data
 }
 
-export async function imgInquiryWithVision (
-  img: string,
-  prompt: string,
-  ctx: OnMessageContext | OnCallBackQueryData
-): Promise<string> {
-  console.log(img, prompt)
-  const payLoad = {
-    model: 'gpt-4-vision-preview',
-    messages: [
-      {
-        role: 'user',
-        content: [
-          { type: 'text', text: 'What’s in this image?' },
-          {
-            type: 'image_url',
-            image_url: { url: img }
-          }
-        ]
-      }
-    ],
-    max_tokens: 300
-  }
-  console.log('HELLO')
-  const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming)
-  console.log(response.choices[0].message?.content)
-  return 'hi'
-}
-
 export async function alterGeneratedImg (
   prompt: string,
   filePath: string,
@@ -207,11 +179,10 @@ export const streamChatCompletion = async (
 }
 
 export const streamChatVisionCompletion = async (
-  conversation: ChatConversation[],
   ctx: OnMessageContext | OnCallBackQueryData,
   model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW,
   prompt: string,
-  imgUrl: string,
+  imgUrls: string[],
   msgId: number,
   limitTokens = true
 ): Promise<string> => {
@@ -224,10 +195,10 @@ export const streamChatVisionCompletion = async (
         role: 'user',
         content: [
           { type: 'text', text: prompt },
-          {
+          ...imgUrls.map(img => ({
             type: 'image_url',
-            image_url: { url: imgUrl }
-          }
+            image_url: { url: img }
+          }))
         ]
       }
     ],
diff --git a/src/modules/open-ai/helpers.ts b/src/modules/open-ai/helpers.ts
index 468be8d..5a858a4 100644
--- a/src/modules/open-ai/helpers.ts
+++ b/src/modules/open-ai/helpers.ts
@@ -9,7 +9,7 @@ import { isValidUrl } from './utils/web-crawler'
 export const SupportedCommands = {
   chat: { name: 'chat' },
   ask: { name: 'ask' },
-  // sum: { name: 'sum' },
+  vision: { name: 'vision' },
   ask35: { name: 'ask35' },
   new: { name: 'new' },
   gpt4: { name: 'gpt4' },
@@ -263,13 +263,14 @@ export const limitPrompt = (prompt: string): string => {
   return `${prompt} in around ${config.openAi.chatGpt.wordLimit} words`
 }
 
-export const getUrlFromText = (ctx: OnMessageContext | OnCallBackQueryData): string | undefined => {
-  const entities = ctx.message?.reply_to_message?.entities
-  if (entities) {
-    const urlEntity = entities.find(e => e.type === 'url')
-    if (urlEntity) {
-      const url = ctx.message?.reply_to_message?.text?.slice(urlEntity.offset, urlEntity.offset + urlEntity.length)
-      return url
+export const getUrlFromText = (ctx: OnMessageContext | OnCallBackQueryData): string[] | undefined => {
+  const entities = ctx.message?.entities ? ctx.message?.entities : ctx.message?.reply_to_message?.entities
+  const text = ctx.message?.text ? ctx.message?.text : ctx.message?.reply_to_message?.text
+  if (entities && text) {
+    const urlEntity = entities.filter(e => e.type === 'url')
+    if (urlEntity.length > 0) {
+      const urls = urlEntity.map(e => text.slice(e.offset, e.offset + e.length))
+      return urls
     }
   }
   return undefined
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
index 805528e..746cc18 100644
--- a/src/modules/open-ai/index.ts
+++ b/src/modules/open-ai/index.ts
@@ -29,6 +29,7 @@ import { sleep } from '../sd-images/utils'
 import {
   getMessageExtras,
   getPromptPrice,
+  getUrlFromText,
   hasChatPrefix,
   hasDallePrefix,
   hasNewPrefix,
@@ -229,6 +230,24 @@ export class OpenAIBot implements PayableBot {
       return
     }
 
+    if (ctx.hasCommand(SupportedCommands.vision.name)) {
+      const photoUrl = getUrlFromText(ctx)
+      if (photoUrl) {
+        const prompt = ctx.match
+        ctx.session.openAi.imageGen.imgRequestQueue.push({
+          prompt,
+          photoUrl,
+          command: !isNaN(+prompt) ? 'alter' : 'vision'
+        })
+        if (!ctx.session.openAi.imageGen.isProcessingQueue) {
+          ctx.session.openAi.imageGen.isProcessingQueue = true
+          await this.onImgRequestHandler(ctx).then(() => {
+            ctx.session.openAi.imageGen.isProcessingQueue = false
+          })
+        }
+      }
+    }
+
     if (
       ctx.hasCommand([SupportedCommands.dalle.name,
         SupportedCommands.dalleImg.name,
@@ -560,7 +579,7 @@ export class OpenAIBot implements PayableBot {
           } else if (img?.command === 'alter') {
             await this.onAlterImage(img?.photo, img?.prompt, ctx)
           } else {
-            await this.onInquiryImage(img?.photo, img?.prompt, ctx)
+            await this.onInquiryImage(img?.photo, img?.photoUrl, img?.prompt, ctx)
           }
           ctx.chatAction = null
         } else {
@@ -609,17 +628,23 @@ export class OpenAIBot implements PayableBot {
     }
   }
 
-  onInquiryImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise<void> => {
+  onInquiryImage = async (photo: PhotoSize[] | undefined, photoUrl: string[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise<void> => {
     try {
       if (ctx.session.openAi.imageGen.isEnabled) {
-        const fileId = photo?.pop()?.file_id // with pop() get full image quality
-        if (!fileId) {
-          await ctx.reply('Cannot retrieve the image file. Please try again.')
-          ctx.transient.analytics.actualResponseTime = now()
-          return
+        // let filePath = ''
+        let imgList = []
+        if (photo) {
+          const fileId = photo?.pop()?.file_id // with pop() get full image quality
+          if (!fileId) {
+            await ctx.reply('Cannot retrieve the image file. Please try again.')
+            ctx.transient.analytics.actualResponseTime = now()
+            return
+          }
+          const file = await ctx.api.getFile(fileId)
+          imgList.push(`${config.openAi.dalle.telegramFileUrl}${config.telegramBotAuthToken}/${file.file_path}`)
+        } else {
+          imgList = photoUrl ?? []
         }
-        const file = await ctx.api.getFile(fileId)
-        const filePath = `${config.openAi.dalle.telegramFileUrl}${config.telegramBotAuthToken}/${file.file_path}`
         const msgId = (
           await ctx.reply('...', {
             message_thread_id:
@@ -627,20 +652,8 @@ export class OpenAIBot implements PayableBot {
               ctx.message?.reply_to_message?.message_thread_id
           })
         ).message_id
-        const messages = [
-          {
-            role: 'user',
-            content: [
-              { type: 'text', text: prompt },
-              {
-                type: 'image_url',
-                image_url: { url: filePath }
-              }
-            ]
-          }
-        ]
         const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW
-        const completion = await streamChatVisionCompletion(messages, ctx, model, prompt ?? '', filePath, msgId, true)
+        const completion = await streamChatVisionCompletion(ctx, model, prompt ?? '', imgList, msgId, true)
         if (completion) {
           ctx.transient.analytics.sessionState = RequestState.Success
           ctx.transient.analytics.actualResponseTime = now()
diff --git a/src/modules/types.ts b/src/modules/types.ts
index 5f8b0af..ef9073e 100644
--- a/src/modules/types.ts
+++ b/src/modules/types.ts
@@ -57,6 +57,7 @@ export interface ImageRequest {
   command?: 'dalle' | 'alter' | 'vision'
   prompt?: string
   photo?: PhotoSize[] | undefined
+  photoUrl?: string[]
 }
 export interface ChatGptSessionData {
   model: string