From 0844a77868b5ce0f0e3cc05e7dacaeaeaa4e13f2 Mon Sep 17 00:00:00 2001 From: fegloff Date: Sun, 14 Jan 2024 19:13:21 -0500 Subject: [PATCH 1/5] add first vision logic --- src/modules/open-ai/api/openAi.ts | 115 ++++++++++++++++++++++++++++-- src/modules/open-ai/index.ts | 78 ++++++++++++++++++-- src/modules/types.ts | 2 +- 3 files changed, 182 insertions(+), 13 deletions(-) diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts index d1d431d..a0b46b2 100644 --- a/src/modules/open-ai/api/openAi.ts +++ b/src/modules/open-ai/api/openAi.ts @@ -18,6 +18,7 @@ import { DalleGPTModels } from '../types' import type fs from 'fs' +import { type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions' const openai = new OpenAI({ apiKey: config.openAiKey }) @@ -48,6 +49,34 @@ export async function postGenerateImg ( return response.data } +export async function imgInquiryWithVision ( + img: string, + prompt: string, + ctx: OnMessageContext | OnCallBackQueryData +): Promise { + console.log(img, prompt) + const payLoad = { + model: 'gpt-4-vision-preview', + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: 'What’s in this image?' }, + { + type: 'image_url', + image_url: { url: img } + } + ] + } + ], + max_tokens: 300 + } + console.log('HELLO') + const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming) + console.log(response.choices[0].message?.content) + return 'hi' +} + export async function alterGeneratedImg ( prompt: string, filePath: string, @@ -177,13 +206,85 @@ export const streamChatCompletion = async ( } }) return completion - // } catch (e) { - // reject(e) - // } - // }) - // } catch (error: any) { - // return await Promise.reject(error) - // } +} +export const streamChatVisionCompletion = async ( + conversation: ChatConversation[], + ctx: OnMessageContext | OnCallBackQueryData, + model = 'gpt-4-vision-preview', + prompt: string, + imgUrl: string, + msgId: number, + limitTokens = true +): Promise => { + let completion = '' + let wordCountMinimum = 2 + const payload = { + model, + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: prompt }, + { + type: 'image_url', + image_url: { url: imgUrl } + } + ] + } + ], + stream: true, + max_tokens: 300 + } + const stream = await openai.chat.completions.create(payload as any) + let wordCount = 0 + if (!ctx.chat?.id) { + throw new Error('Context chat id should not be empty after openAI streaming') + } + for await (const part of stream as any) { + wordCount++ + const chunck = part.choices[0]?.delta?.content + ? part.choices[0]?.delta?.content + : '' + completion += chunck + + if (wordCount > wordCountMinimum) { + if (wordCountMinimum < 64) { + wordCountMinimum *= 2 + } + completion = completion.replaceAll('...', '') + completion += '...' + wordCount = 0 + await ctx.api + .editMessageText(ctx.chat?.id, msgId, completion) + .catch(async (e: any) => { + if (e instanceof GrammyError) { + if (e.error_code !== 400) { + throw e + } else { + logger.error(e) + } + } else { + throw e + } + }) + } + } + completion = completion.replaceAll('...', '') + + await ctx.api + .editMessageText(ctx.chat?.id, msgId, completion) + .catch((e: any) => { + if (e instanceof GrammyError) { + if (e.error_code !== 400) { + throw e + } else { + logger.error(e) + } + } else { + throw e + } + }) + return completion } export async function improvePrompt (promptText: string, model: string): Promise { diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts index 809006a..c96ba90 100644 --- a/src/modules/open-ai/index.ts +++ b/src/modules/open-ai/index.ts @@ -18,7 +18,8 @@ import { getDalleModel, getDalleModelPrice, postGenerateImg, - streamChatCompletion + streamChatCompletion, + streamChatVisionCompletion } from './api/openAi' import { appText } from './utils/text' import { chatService } from '../../database/services' @@ -142,7 +143,7 @@ export class OpenAIBot implements PayableBot { const photo = ctx.message?.photo ?? ctx.message?.reply_to_message?.photo if (photo && ctx.session.openAi.imageGen.isEnabled) { const prompt = ctx.message?.caption ?? ctx.message?.text - if (prompt && !isNaN(+prompt)) { + if (prompt) { // && !isNaN(+prompt) return true } } @@ -161,11 +162,11 @@ export class OpenAIBot implements PayableBot { if (this.isSupportedImageReply(ctx)) { const photo = ctx.message?.photo ?? ctx.message?.reply_to_message?.photo - const prompt = ctx.message?.caption ?? ctx.message?.text + const prompt = ctx.message?.caption ?? ctx.message?.text ?? '' ctx.session.openAi.imageGen.imgRequestQueue.push({ prompt, photo, - command: 'alter' + command: !isNaN(+prompt) ? 'alter' : 'vision' }) if (!ctx.session.openAi.imageGen.isProcessingQueue) { ctx.session.openAi.imageGen.isProcessingQueue = true @@ -556,8 +557,10 @@ export class OpenAIBot implements PayableBot { if (await this.hasBalance(ctx)) { if (img?.command === 'dalle') { await this.onGenImgCmd(img?.prompt, ctx) - } else { + } else if (img?.command === 'alter') { await this.onAlterImage(img?.photo, img?.prompt, ctx) + } else { + await this.onInquiryImage(img?.photo, img?.prompt, ctx) } ctx.chatAction = null } else { @@ -606,6 +609,71 @@ export class OpenAIBot implements PayableBot { } } + // imgInquiryWithVision = async ( + // img: string, + // prompt: string, + // ctx: OnMessageContext | OnCallBackQueryData + // ): Promise => { + // console.log(img, prompt) + // console.log('HELLO') + // const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming) + // console.log(response.choices[0].message?.content) + // return 'hi' + // } + + onInquiryImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise => { + try { + if (ctx.session.openAi.imageGen.isEnabled) { + const fileId = photo?.pop()?.file_id // with pop() get full image quality + if (!fileId) { + await ctx.reply('Cannot retrieve the image file. Please try again.') + ctx.transient.analytics.actualResponseTime = now() + return + } + const file = await ctx.api.getFile(fileId) + const filePath = `${config.openAi.dalle.telegramFileUrl}${config.telegramBotAuthToken}/${file.file_path}` + const msgId = ( + await ctx.reply('...', { + message_thread_id: + ctx.message?.message_thread_id ?? + ctx.message?.reply_to_message?.message_thread_id + }) + ).message_id + const completion = await streamChatVisionCompletion([], ctx, 'gpt-4-vision-preview', prompt ?? '', filePath, msgId, true) + console.log(completion) + // const inquiry = await imgInquiryWithVision(filePath, prompt ?? '', ctx) + // console.log(inquiry) + // const imgSize = ctx.session.openAi.imageGen.imgSize + // ctx.chatAction = 'upload_photo' + // const imgs = await alterGeneratedImg(prompt ?? '', filePath, ctx, imgSize) + // if (imgs) { + // imgs.map(async (img: any) => { + // if (img?.url) { + // await ctx + // .replyWithPhoto(img.url, { message_thread_id: ctx.message?.message_thread_id }) + // .catch(async (e) => { + // await this.onError( + // ctx, + // e, + // MAX_TRIES, + // 'There was an error while generating the image' + // ) + // }) + // } + // }) + // } + // ctx.chatAction = null + } + } catch (e: any) { + await this.onError( + ctx, + e, + MAX_TRIES, + 'An error occurred while generating the AI edit' + ) + } + } + onAlterImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise => { try { if (ctx.session.openAi.imageGen.isEnabled) { diff --git a/src/modules/types.ts b/src/modules/types.ts index 2d34a5b..928c12b 100644 --- a/src/modules/types.ts +++ b/src/modules/types.ts @@ -47,7 +47,7 @@ export interface ChatConversation { } export interface ImageRequest { - command?: 'dalle' | 'alter' + command?: 'dalle' | 'alter' | 'vision' prompt?: string photo?: PhotoSize[] | undefined } From 939557796a37aecc4fff4ce5c3a90ebf22428c1a Mon Sep 17 00:00:00 2001 From: fegloff Date: Mon, 15 Jan 2024 18:01:01 -0500 Subject: [PATCH 2/5] update vision logic to comply with openai api --- src/modules/llms/index.ts | 4 +- src/modules/open-ai/api/openAi.ts | 13 ++--- src/modules/open-ai/helpers.ts | 4 +- src/modules/open-ai/index.ts | 86 +++++++++++++++++-------------- src/modules/open-ai/types.ts | 8 +++ src/modules/payment/index.ts | 1 - src/modules/types.ts | 9 +++- 7 files changed, 72 insertions(+), 53 deletions(-) diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index 35b381b..a808715 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -550,7 +550,7 @@ export class LlmsBot implements PayableBot { await ctx.api.editMessageText( ctx.chat.id, msgId, - response.completion.content + response.completion.content as string ) conversation.push(response.completion) // const price = getPromptPrice(completion, data); @@ -648,7 +648,7 @@ export class LlmsBot implements PayableBot { return } const chat: ChatConversation = { - content: limitPrompt(prompt), + content: limitPrompt(prompt as string), model } if (model === LlmsModelsEnum.BISON) { diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts index a0b46b2..333b70c 100644 --- a/src/modules/open-ai/api/openAi.ts +++ b/src/modules/open-ai/api/openAi.ts @@ -18,7 +18,7 @@ import { DalleGPTModels } from '../types' import type fs from 'fs' -import { type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions' +import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions' const openai = new OpenAI({ apiKey: config.openAiKey }) @@ -112,15 +112,12 @@ export async function chatCompletion ( model = config.openAi.chatGpt.model, limitTokens = true ): Promise { - const payload = { + const response = await openai.chat.completions.create({ model, max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined, temperature: config.openAi.dalle.completions.temperature, - messages: conversation - } - const response = await openai.chat.completions.create( - payload as OpenAI.Chat.CompletionCreateParamsNonStreaming - ) + messages: conversation as ChatCompletionMessageParam[] + }) const chatModel = getChatModel(model) if (response.usage?.prompt_tokens === undefined) { throw new Error('Unknown number of prompt tokens used') @@ -149,7 +146,7 @@ export const streamChatCompletion = async ( let wordCountMinimum = 2 const stream = await openai.chat.completions.create({ model, - messages: conversation as OpenAI.Chat.Completions.CreateChatCompletionRequestMessage[], + messages: conversation as ChatCompletionMessageParam[], // OpenAI.Chat.Completions.CreateChatCompletionRequestMessage[], stream: true, max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined, temperature: config.openAi.dalle.completions.temperature || 0.8 diff --git a/src/modules/open-ai/helpers.ts b/src/modules/open-ai/helpers.ts index a873124..0e8a1e2 100644 --- a/src/modules/open-ai/helpers.ts +++ b/src/modules/open-ai/helpers.ts @@ -235,8 +235,8 @@ export const hasPrefix = (prompt: string): string => { export const getPromptPrice = (completion: string, data: ChatPayload): { price: number, promptTokens: number, completionTokens: number } => { const { conversation, ctx, model } = data - const prompt = conversation[conversation.length - 1].content - const promptTokens = getTokenNumber(prompt) + const prompt = data.prompt ? data.prompt : conversation[conversation.length - 1].content + const promptTokens = getTokenNumber(prompt as string) const completionTokens = getTokenNumber(completion) const modelPrice = getChatModel(model) const price = diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts index c96ba90..bf76cc0 100644 --- a/src/modules/open-ai/index.ts +++ b/src/modules/open-ai/index.ts @@ -14,12 +14,12 @@ import { } from '../types' import { alterGeneratedImg, + chatCompletion, getChatModel, getDalleModel, getDalleModelPrice, postGenerateImg, - streamChatCompletion, - streamChatVisionCompletion + streamChatCompletion } from './api/openAi' import { appText } from './utils/text' import { chatService } from '../../database/services' @@ -91,7 +91,7 @@ export class OpenAIBot implements PayableBot { try { const priceAdjustment = config.openAi.chatGpt.priceAdjustment const prompts = ctx.match - if (this.isSupportedImageReply(ctx)) { + if (this.isSupportedImageReply(ctx) && !isNaN(+prompts)) { const imageNumber = ctx.message?.caption || ctx.message?.text const imageSize = ctx.session.openAi.imageGen.imgSize const model = getDalleModel(imageSize) @@ -609,18 +609,6 @@ export class OpenAIBot implements PayableBot { } } - // imgInquiryWithVision = async ( - // img: string, - // prompt: string, - // ctx: OnMessageContext | OnCallBackQueryData - // ): Promise => { - // console.log(img, prompt) - // console.log('HELLO') - // const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming) - // console.log(response.choices[0].message?.content) - // return 'hi' - // } - onInquiryImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise => { try { if (ctx.session.openAi.imageGen.isEnabled) { @@ -639,30 +627,50 @@ export class OpenAIBot implements PayableBot { ctx.message?.reply_to_message?.message_thread_id }) ).message_id - const completion = await streamChatVisionCompletion([], ctx, 'gpt-4-vision-preview', prompt ?? '', filePath, msgId, true) - console.log(completion) - // const inquiry = await imgInquiryWithVision(filePath, prompt ?? '', ctx) - // console.log(inquiry) - // const imgSize = ctx.session.openAi.imageGen.imgSize - // ctx.chatAction = 'upload_photo' - // const imgs = await alterGeneratedImg(prompt ?? '', filePath, ctx, imgSize) - // if (imgs) { - // imgs.map(async (img: any) => { - // if (img?.url) { - // await ctx - // .replyWithPhoto(img.url, { message_thread_id: ctx.message?.message_thread_id }) - // .catch(async (e) => { - // await this.onError( - // ctx, - // e, - // MAX_TRIES, - // 'There was an error while generating the image' - // ) - // }) - // } - // }) - // } - // ctx.chatAction = null + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: prompt }, + { + type: 'image_url', + image_url: { url: filePath } + } + ] + } + ] + const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW + const completion = await chatCompletion(messages as any, model, true) + if (completion) { + await ctx.api + .editMessageText(`${ctx.chat?.id}`, msgId, completion.completion) + .catch(async (e: any) => { + await this.onError( + ctx, + e, + MAX_TRIES, + 'An error occurred while generating the AI edit' + ) + }) + ctx.transient.analytics.sessionState = RequestState.Success + ctx.transient.analytics.actualResponseTime = now() + const price = getPromptPrice(completion.completion, { + conversation: [], + prompt, + model, + ctx + }) + this.logger.info( + `streamChatCompletion result = tokens: ${ + price.promptTokens + price.completionTokens + } | ${model} | price: ${price.price}¢` + ) + if ( + !(await this.payments.pay(ctx as OnMessageContext, price.price)) + ) { + await this.onNotBalanceMessage(ctx) + } + } } } catch (e: any) { await this.onError( diff --git a/src/modules/open-ai/types.ts b/src/modules/open-ai/types.ts index 46a60c9..3feb117 100644 --- a/src/modules/open-ai/types.ts +++ b/src/modules/open-ai/types.ts @@ -16,6 +16,7 @@ export enum ChatGPTModelsEnum { GPT_4_32K = 'gpt-4-32k', GPT_35_TURBO = 'gpt-3.5-turbo', GPT_35_TURBO_16K = 'gpt-3.5-turbo-16k', + GPT_4_VISION_PREVIEW = 'gpt-4-vision-preview' } export const ChatGPTModels: Record = { @@ -46,6 +47,13 @@ export const ChatGPTModels: Record = { outputPrice: 0.004, maxContextTokens: 16000, chargeType: 'TOKEN' + }, + 'gpt-4-vision-preview': { + name: 'gpt-4-vision-preview', + inputPrice: 0.03, + outputPrice: 0.06, + maxContextTokens: 16000, + chargeType: 'TOKEN' } } diff --git a/src/modules/payment/index.ts b/src/modules/payment/index.ts index 0b8ba85..5f85568 100644 --- a/src/modules/payment/index.ts +++ b/src/modules/payment/index.ts @@ -388,7 +388,6 @@ export class BotPayments { public async pay (ctx: OnMessageContext, amountUSD: number): Promise { // eslint-disable-next-line @typescript-eslint/naming-convention const { from, message_id, chat } = ctx.update.message - const accountId = this.getAccountId(ctx) const userAccount = this.getUserAccount(accountId) if (!userAccount) { diff --git a/src/modules/types.ts b/src/modules/types.ts index 928c12b..c50cc60 100644 --- a/src/modules/types.ts +++ b/src/modules/types.ts @@ -36,13 +36,20 @@ export interface ChatCompletion { } export interface ChatPayload { conversation: ChatConversation[] + prompt?: string model: string ctx: OnMessageContext | OnCallBackQueryData } + +export interface VisionContent { + type: string + text?: string + image_url?: { url: string } +} export interface ChatConversation { role?: string author?: string - content: string + content: string | [VisionContent] model?: string } From b2690345480e985c31ed144f16ae0ce46b0f69ab Mon Sep 17 00:00:00 2001 From: fegloff Date: Tue, 16 Jan 2024 17:39:32 -0500 Subject: [PATCH 3/5] add stream completion for vision prompt --- src/modules/llms/index.ts | 4 ++-- src/modules/open-ai/api/openAi.ts | 26 +++++++++++++++++++------- src/modules/open-ai/index.ts | 20 +++++--------------- src/modules/types.ts | 2 +- tsconfig.json | 2 +- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index a808715..fbd0d18 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -629,7 +629,7 @@ export class LlmsBot implements PayableBot { while (ctx.session.llms.requestQueue.length > 0) { try { const msg = ctx.session.llms.requestQueue.shift() - const prompt = msg?.content + const prompt = msg?.content as string const model = msg?.model const { chatConversation } = ctx.session.llms if (await this.hasBalance(ctx)) { @@ -648,7 +648,7 @@ export class LlmsBot implements PayableBot { return } const chat: ChatConversation = { - content: limitPrompt(prompt as string), + content: limitPrompt(prompt), model } if (model === LlmsModelsEnum.BISON) { diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts index 333b70c..e094016 100644 --- a/src/modules/open-ai/api/openAi.ts +++ b/src/modules/open-ai/api/openAi.ts @@ -1,10 +1,10 @@ import OpenAI from 'openai' import { encode } from 'gpt-tokenizer' import { GrammyError } from 'grammy' - import config from '../../../config' import { deleteFile, getImage } from '../utils/file' import { + // type VisionContent, type ChatCompletion, type ChatConversation, type OnCallBackQueryData, @@ -15,10 +15,12 @@ import { type ChatModel, ChatGPTModels, type DalleGPTModel, - DalleGPTModels + DalleGPTModels, + ChatGPTModelsEnum } from '../types' import type fs from 'fs' import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions' +import { type Stream } from 'openai/streaming' const openai = new OpenAI({ apiKey: config.openAiKey }) @@ -204,10 +206,20 @@ export const streamChatCompletion = async ( }) return completion } + +// interface VisionCompletionBody { +// model: string +// messages: Array<{ +// role: string +// content: string | VisionContent[] +// }> +// stream: boolean +// max_tokens: number | undefined +// } export const streamChatVisionCompletion = async ( conversation: ChatConversation[], ctx: OnMessageContext | OnCallBackQueryData, - model = 'gpt-4-vision-preview', + model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW, prompt: string, imgUrl: string, msgId: number, @@ -215,7 +227,7 @@ export const streamChatVisionCompletion = async ( ): Promise => { let completion = '' let wordCountMinimum = 2 - const payload = { + const payload: any = { model, messages: [ { @@ -230,14 +242,14 @@ export const streamChatVisionCompletion = async ( } ], stream: true, - max_tokens: 300 + max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined } - const stream = await openai.chat.completions.create(payload as any) + const stream = await openai.chat.completions.create(payload) as unknown as Stream let wordCount = 0 if (!ctx.chat?.id) { throw new Error('Context chat id should not be empty after openAI streaming') } - for await (const part of stream as any) { + for await (const part of stream) { wordCount++ const chunck = part.choices[0]?.delta?.content ? part.choices[0]?.delta?.content diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts index bf76cc0..805528e 100644 --- a/src/modules/open-ai/index.ts +++ b/src/modules/open-ai/index.ts @@ -14,12 +14,12 @@ import { } from '../types' import { alterGeneratedImg, - chatCompletion, getChatModel, getDalleModel, getDalleModelPrice, postGenerateImg, - streamChatCompletion + streamChatCompletion, + streamChatVisionCompletion } from './api/openAi' import { appText } from './utils/text' import { chatService } from '../../database/services' @@ -578,7 +578,7 @@ export class OpenAIBot implements PayableBot { ctx.chatAction = 'upload_photo' // eslint-disable-next-line @typescript-eslint/naming-convention const { message_id } = await ctx.reply( - 'Generating dalle image...', { message_thread_id: ctx.message?.message_thread_id } + 'Generating image via OpenAI\'s DALL·E 3...', { message_thread_id: ctx.message?.message_thread_id } ) const numImages = ctx.session.openAi.imageGen.numImages const imgSize = ctx.session.openAi.imageGen.imgSize @@ -640,21 +640,11 @@ export class OpenAIBot implements PayableBot { } ] const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW - const completion = await chatCompletion(messages as any, model, true) + const completion = await streamChatVisionCompletion(messages, ctx, model, prompt ?? '', filePath, msgId, true) if (completion) { - await ctx.api - .editMessageText(`${ctx.chat?.id}`, msgId, completion.completion) - .catch(async (e: any) => { - await this.onError( - ctx, - e, - MAX_TRIES, - 'An error occurred while generating the AI edit' - ) - }) ctx.transient.analytics.sessionState = RequestState.Success ctx.transient.analytics.actualResponseTime = now() - const price = getPromptPrice(completion.completion, { + const price = getPromptPrice(completion, { conversation: [], prompt, model, diff --git a/src/modules/types.ts b/src/modules/types.ts index c50cc60..5f8b0af 100644 --- a/src/modules/types.ts +++ b/src/modules/types.ts @@ -49,7 +49,7 @@ export interface VisionContent { export interface ChatConversation { role?: string author?: string - content: string | [VisionContent] + content: string | VisionContent[] model?: string } diff --git a/tsconfig.json b/tsconfig.json index 4c6d001..932a5b7 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,7 +4,7 @@ "module": "CommonJS", "outDir": "dist", "types": ["node"], - "lib": ["es2022"], + "lib": ["es2022"], // , "dom", "dom.iterable"] "target": "es2020", "emitDecoratorMetadata": true, "experimentalDecorators": true, From ec86fcd585b074ca4ee58247ed9eeb5fae1db504 Mon Sep 17 00:00:00 2001 From: fegloff Date: Tue, 16 Jan 2024 17:40:21 -0500 Subject: [PATCH 4/5] minor change --- src/modules/open-ai/api/openAi.ts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts index e094016..d64957a 100644 --- a/src/modules/open-ai/api/openAi.ts +++ b/src/modules/open-ai/api/openAi.ts @@ -4,7 +4,6 @@ import { GrammyError } from 'grammy' import config from '../../../config' import { deleteFile, getImage } from '../utils/file' import { - // type VisionContent, type ChatCompletion, type ChatConversation, type OnCallBackQueryData, @@ -207,15 +206,6 @@ export const streamChatCompletion = async ( return completion } -// interface VisionCompletionBody { -// model: string -// messages: Array<{ -// role: string -// content: string | VisionContent[] -// }> -// stream: boolean -// max_tokens: number | undefined -// } export const streamChatVisionCompletion = async ( conversation: ChatConversation[], ctx: OnMessageContext | OnCallBackQueryData, From 695f97907d97b9ef418042d610d7145f1a729197 Mon Sep 17 00:00:00 2001 From: fegloff Date: Tue, 16 Jan 2024 19:46:20 -0500 Subject: [PATCH 5/5] add stream completion for vision + add vision command to work with multiple img url --- src/modules/llms/index.ts | 18 +++++----- src/modules/open-ai/api/openAi.ts | 39 +++------------------ src/modules/open-ai/helpers.ts | 17 ++++----- src/modules/open-ai/index.ts | 57 +++++++++++++++++++------------ src/modules/types.ts | 1 + 5 files changed, 60 insertions(+), 72 deletions(-) diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index fbd0d18..4696dda 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -85,7 +85,7 @@ export class LlmsBot implements PayableBot { return undefined } - private isSupportedUrlReply (ctx: OnMessageContext | OnCallBackQueryData): string | undefined { + private isSupportedUrlReply (ctx: OnMessageContext | OnCallBackQueryData): string[] | undefined { return getUrlFromText(ctx) } @@ -251,14 +251,16 @@ export class LlmsBot implements PayableBot { async onUrlReplyHandler (ctx: OnMessageContext | OnCallBackQueryData): Promise { try { - const url = getUrlFromText(ctx) ?? '' - const prompt = ctx.message?.text ?? 'summarize' - const collection = ctx.session.collections.activeCollections.find(c => c.url === url) - const newPrompt = `${prompt}` // ${url} - if (collection) { - await this.queryUrlCollection(ctx, url, newPrompt) + const url = getUrlFromText(ctx) + if (url) { + const prompt = ctx.message?.text ?? 'summarize' + const collection = ctx.session.collections.activeCollections.find(c => c.url === url[0]) + const newPrompt = `${prompt}` // ${url} + if (collection) { + await this.queryUrlCollection(ctx, url[0], newPrompt) + } + ctx.transient.analytics.actualResponseTime = now() } - ctx.transient.analytics.actualResponseTime = now() } catch (e: any) { await this.onError(ctx, e) } diff --git a/src/modules/open-ai/api/openAi.ts b/src/modules/open-ai/api/openAi.ts index d64957a..ad34603 100644 --- a/src/modules/open-ai/api/openAi.ts +++ b/src/modules/open-ai/api/openAi.ts @@ -18,7 +18,7 @@ import { ChatGPTModelsEnum } from '../types' import type fs from 'fs' -import { type ChatCompletionMessageParam, type ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions' +import { type ChatCompletionMessageParam } from 'openai/resources/chat/completions' import { type Stream } from 'openai/streaming' const openai = new OpenAI({ apiKey: config.openAiKey }) @@ -50,34 +50,6 @@ export async function postGenerateImg ( return response.data } -export async function imgInquiryWithVision ( - img: string, - prompt: string, - ctx: OnMessageContext | OnCallBackQueryData -): Promise { - console.log(img, prompt) - const payLoad = { - model: 'gpt-4-vision-preview', - messages: [ - { - role: 'user', - content: [ - { type: 'text', text: 'What’s in this image?' }, - { - type: 'image_url', - image_url: { url: img } - } - ] - } - ], - max_tokens: 300 - } - console.log('HELLO') - const response = await openai.chat.completions.create(payLoad as unknown as ChatCompletionCreateParamsNonStreaming) - console.log(response.choices[0].message?.content) - return 'hi' -} - export async function alterGeneratedImg ( prompt: string, filePath: string, @@ -207,11 +179,10 @@ export const streamChatCompletion = async ( } export const streamChatVisionCompletion = async ( - conversation: ChatConversation[], ctx: OnMessageContext | OnCallBackQueryData, model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW, prompt: string, - imgUrl: string, + imgUrls: string[], msgId: number, limitTokens = true ): Promise => { @@ -224,10 +195,10 @@ export const streamChatVisionCompletion = async ( role: 'user', content: [ { type: 'text', text: prompt }, - { + ...imgUrls.map(img => ({ type: 'image_url', - image_url: { url: imgUrl } - } + image_url: { url: img } + })) ] } ], diff --git a/src/modules/open-ai/helpers.ts b/src/modules/open-ai/helpers.ts index 468be8d..5a858a4 100644 --- a/src/modules/open-ai/helpers.ts +++ b/src/modules/open-ai/helpers.ts @@ -9,7 +9,7 @@ import { isValidUrl } from './utils/web-crawler' export const SupportedCommands = { chat: { name: 'chat' }, ask: { name: 'ask' }, - // sum: { name: 'sum' }, + vision: { name: 'vision' }, ask35: { name: 'ask35' }, new: { name: 'new' }, gpt4: { name: 'gpt4' }, @@ -263,13 +263,14 @@ export const limitPrompt = (prompt: string): string => { return `${prompt} in around ${config.openAi.chatGpt.wordLimit} words` } -export const getUrlFromText = (ctx: OnMessageContext | OnCallBackQueryData): string | undefined => { - const entities = ctx.message?.reply_to_message?.entities - if (entities) { - const urlEntity = entities.find(e => e.type === 'url') - if (urlEntity) { - const url = ctx.message?.reply_to_message?.text?.slice(urlEntity.offset, urlEntity.offset + urlEntity.length) - return url +export const getUrlFromText = (ctx: OnMessageContext | OnCallBackQueryData): string[] | undefined => { + const entities = ctx.message?.entities ? ctx.message?.entities : ctx.message?.reply_to_message?.entities + const text = ctx.message?.text ? ctx.message?.text : ctx.message?.reply_to_message?.text + if (entities && text) { + const urlEntity = entities.filter(e => e.type === 'url') + if (urlEntity.length > 0) { + const urls = urlEntity.map(e => text.slice(e.offset, e.offset + e.length)) + return urls } } return undefined diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts index 805528e..746cc18 100644 --- a/src/modules/open-ai/index.ts +++ b/src/modules/open-ai/index.ts @@ -29,6 +29,7 @@ import { sleep } from '../sd-images/utils' import { getMessageExtras, getPromptPrice, + getUrlFromText, hasChatPrefix, hasDallePrefix, hasNewPrefix, @@ -229,6 +230,24 @@ export class OpenAIBot implements PayableBot { return } + if (ctx.hasCommand(SupportedCommands.vision.name)) { + const photoUrl = getUrlFromText(ctx) + if (photoUrl) { + const prompt = ctx.match + ctx.session.openAi.imageGen.imgRequestQueue.push({ + prompt, + photoUrl, + command: !isNaN(+prompt) ? 'alter' : 'vision' + }) + if (!ctx.session.openAi.imageGen.isProcessingQueue) { + ctx.session.openAi.imageGen.isProcessingQueue = true + await this.onImgRequestHandler(ctx).then(() => { + ctx.session.openAi.imageGen.isProcessingQueue = false + }) + } + } + } + if ( ctx.hasCommand([SupportedCommands.dalle.name, SupportedCommands.dalleImg.name, @@ -560,7 +579,7 @@ export class OpenAIBot implements PayableBot { } else if (img?.command === 'alter') { await this.onAlterImage(img?.photo, img?.prompt, ctx) } else { - await this.onInquiryImage(img?.photo, img?.prompt, ctx) + await this.onInquiryImage(img?.photo, img?.photoUrl, img?.prompt, ctx) } ctx.chatAction = null } else { @@ -609,17 +628,23 @@ export class OpenAIBot implements PayableBot { } } - onInquiryImage = async (photo: PhotoSize[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise => { + onInquiryImage = async (photo: PhotoSize[] | undefined, photoUrl: string[] | undefined, prompt: string | undefined, ctx: OnMessageContext | OnCallBackQueryData): Promise => { try { if (ctx.session.openAi.imageGen.isEnabled) { - const fileId = photo?.pop()?.file_id // with pop() get full image quality - if (!fileId) { - await ctx.reply('Cannot retrieve the image file. Please try again.') - ctx.transient.analytics.actualResponseTime = now() - return + // let filePath = '' + let imgList = [] + if (photo) { + const fileId = photo?.pop()?.file_id // with pop() get full image quality + if (!fileId) { + await ctx.reply('Cannot retrieve the image file. Please try again.') + ctx.transient.analytics.actualResponseTime = now() + return + } + const file = await ctx.api.getFile(fileId) + imgList.push(`${config.openAi.dalle.telegramFileUrl}${config.telegramBotAuthToken}/${file.file_path}`) + } else { + imgList = photoUrl ?? [] } - const file = await ctx.api.getFile(fileId) - const filePath = `${config.openAi.dalle.telegramFileUrl}${config.telegramBotAuthToken}/${file.file_path}` const msgId = ( await ctx.reply('...', { message_thread_id: @@ -627,20 +652,8 @@ export class OpenAIBot implements PayableBot { ctx.message?.reply_to_message?.message_thread_id }) ).message_id - const messages = [ - { - role: 'user', - content: [ - { type: 'text', text: prompt }, - { - type: 'image_url', - image_url: { url: filePath } - } - ] - } - ] const model = ChatGPTModelsEnum.GPT_4_VISION_PREVIEW - const completion = await streamChatVisionCompletion(messages, ctx, model, prompt ?? '', filePath, msgId, true) + const completion = await streamChatVisionCompletion(ctx, model, prompt ?? '', imgList, msgId, true) if (completion) { ctx.transient.analytics.sessionState = RequestState.Success ctx.transient.analytics.actualResponseTime = now() diff --git a/src/modules/types.ts b/src/modules/types.ts index 5f8b0af..ef9073e 100644 --- a/src/modules/types.ts +++ b/src/modules/types.ts @@ -57,6 +57,7 @@ export interface ImageRequest { command?: 'dalle' | 'alter' | 'vision' prompt?: string photo?: PhotoSize[] | undefined + photoUrl?: string[] } export interface ChatGptSessionData { model: string