From 049319e6b4b873381fcbebf455807e26e6ccc23a Mon Sep 17 00:00:00 2001 From: fegloff Date: Tue, 19 Mar 2024 00:28:30 -0500 Subject: [PATCH 1/8] add gemini model --- src/modules/llms/api/athropic.ts | 2 +- src/modules/llms/api/vertex.ts | 93 ++++++++++++++++++++++++++++++-- src/modules/llms/helpers.ts | 4 +- src/modules/llms/index.ts | 36 +++++++++---- src/modules/llms/types.ts | 10 +++- 5 files changed, 129 insertions(+), 16 deletions(-) diff --git a/src/modules/llms/api/athropic.ts b/src/modules/llms/api/athropic.ts index 9eccd2c..2d7e2b9 100644 --- a/src/modules/llms/api/athropic.ts +++ b/src/modules/llms/api/athropic.ts @@ -16,7 +16,7 @@ const logger = pino({ } }) -const API_ENDPOINT = config.llms.apiEndpoint // 'http://127.0.0.1:5000' // config.llms.apiEndpoint +const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // 'http://127.0.0.1:5000' // config.llms.apiEndpoint export const anthropicCompletion = async ( conversation: ChatConversation[], diff --git a/src/modules/llms/api/vertex.ts b/src/modules/llms/api/vertex.ts index d63a87d..ca5799f 100644 --- a/src/modules/llms/api/vertex.ts +++ b/src/modules/llms/api/vertex.ts @@ -1,9 +1,21 @@ -import axios from 'axios' +import axios, { type AxiosResponse } from 'axios' import config from '../../../config' -import { type ChatConversation } from '../../types' +import { type OnMessageContext, type ChatConversation, type OnCallBackQueryData } from '../../types' import { type LlmCompletion } from './llmApi' +import { type Readable } from 'stream' +import { GrammyError } from 'grammy' +import { pino } from 'pino' +import { LlmsModelsEnum } from '../types' -const API_ENDPOINT = config.llms.apiEndpoint // http://localhost:8080' // config.llms.apiEndpoint +const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // http://localhost:8080' // config.llms.apiEndpoint + +const logger = pino({ + name: 'Gemini - llmsBot', + transport: { + target: 'pino-pretty', + options: { colorize: true } + } +}) export const vertexCompletion = async ( conversation: ChatConversation[], @@ -35,3 +47,78 @@ export const vertexCompletion = async ( price: 0 } } + +export const vertexStreamCompletion = async ( + conversation: ChatConversation[], + model = LlmsModelsEnum.CLAUDE_OPUS, + ctx: OnMessageContext | OnCallBackQueryData, + msgId: number, + limitTokens = true +): Promise => { + const data = { + model, + stream: true, // Set stream to true to receive the completion as a stream + system: config.openAi.chatGpt.chatCompletionContext, + max_tokens: limitTokens ? +config.openAi.chatGpt.maxTokens : undefined, + messages: conversation.map(m => { return { parts: { text: m.content }, role: m.role !== 'user' ? 'model' : 'user' } }) + } + const url = `${API_ENDPOINT}/vertex/completions/gemini` + if (!ctx.chat?.id) { + throw new Error('Context chat id should not be empty after openAI streaming') + } + const response: AxiosResponse = await axios.post(url, data, { responseType: 'stream' }) + // Create a Readable stream from the response + const completionStream: Readable = response.data + // Read and process the stream + let completion = '' + for await (const chunk of completionStream) { + const msg = chunk.toString() + if (msg) { + completion += msg.split('Text: ')[1] + completion = completion.replaceAll('...', '') + completion += '...' + if (ctx.chat?.id) { + await ctx.api + .editMessageText(ctx.chat?.id, msgId, completion) + .catch(async (e: any) => { + if (e instanceof GrammyError) { + if (e.error_code !== 400) { + throw e + } else { + logger.error(e) + } + } else { + throw e + } + }) + } + } + } + completion = completion.replaceAll('...', '') + await ctx.api + .editMessageText(ctx.chat?.id, msgId, completion) + .catch((e: any) => { + if (e instanceof GrammyError) { + if (e.error_code !== 400) { + throw e + } else { + logger.error(e) + } + } else { + throw e + } + }) + const totalOutputTokens = '10' // response.headers['x-openai-output-tokens'] + const totalInputTokens = '10' // response.headers['x-openai-input-tokens'] + return { + completion: { + content: completion, + role: 'assistant', + model + }, + usage: parseInt(totalOutputTokens, 10) + parseInt(totalInputTokens, 10), + price: 0, + inputTokens: parseInt(totalInputTokens, 10), + outputTokens: parseInt(totalOutputTokens, 10) + } +} diff --git a/src/modules/llms/helpers.ts b/src/modules/llms/helpers.ts index 9d82dd7..43859bc 100644 --- a/src/modules/llms/helpers.ts +++ b/src/modules/llms/helpers.ts @@ -26,7 +26,9 @@ export enum SupportedCommands { j2Ultra = 'j2-ultra', sum = 'sum', ctx = 'ctx', - pdf = 'pdf' + pdf = 'pdf', + gemini = 'gemini', + gShort = 'g' } export const MAX_TRIES = 3 diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index c6dd8bc..5e55ab8 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -32,7 +32,7 @@ import { SupportedCommands } from './helpers' import { getUrlFromText, preparePrompt, sendMessage } from '../open-ai/helpers' -import { vertexCompletion } from './api/vertex' +import { vertexCompletion, vertexStreamCompletion } from './api/vertex' import { type LlmCompletion, llmCompletion, llmCheckCollectionStatus, queryUrlDocument, deleteCollection } from './api/llmApi' import { LlmsModelsEnum } from './types' import * as Sentry from '@sentry/node' @@ -129,6 +129,10 @@ export class LlmsBot implements PayableBot { await this.onChat(ctx, LlmsModelsEnum.BISON) return } + if (ctx.hasCommand(SupportedCommands.gemini) || ctx.hasCommand(SupportedCommands.gShort)) { + await this.onChat(ctx, LlmsModelsEnum.GEMINI) + return + } if (ctx.hasCommand([SupportedCommands.claudeOpus, SupportedCommands.opus, SupportedCommands.opusShort]) || (hasClaudeOpusPrefix(ctx.message?.text ?? '') !== '')) { await this.onChat(ctx, LlmsModelsEnum.CLAUDE_OPUS) return @@ -567,13 +571,23 @@ export class LlmsBot implements PayableBot { if (isTypingEnabled) { ctx.chatAction = 'typing' } - const completion = await anthropicStreamCompletion( - conversation, - model as LlmsModelsEnum, - ctx, - msgId, - true // telegram messages has a character limit - ) + let completion: LlmCompletion + if (model === LlmsModelsEnum.GEMINI) { + completion = await vertexStreamCompletion(conversation, + model as LlmsModelsEnum, + ctx, + msgId, + true // telegram messages has a character limit + ) + } else { + completion = await anthropicStreamCompletion( + conversation, + model as LlmsModelsEnum, + ctx, + msgId, + true // telegram messages has a character limit + ) + } if (isTypingEnabled) { ctx.chatAction = null } @@ -585,7 +599,7 @@ export class LlmsBot implements PayableBot { `streamChatCompletion result = tokens: ${price.promptTokens + price.completionTokens} | ${model} | price: ${price.price}¢` // } ) conversation.push({ - role: 'assistant', + role: model === LlmsModelsEnum.GEMINI ? 'model' : 'assistant', content: completion.completion?.content ?? '' }) return { @@ -754,7 +768,7 @@ export class LlmsBot implements PayableBot { ctx } let result: { price: number, chat: ChatConversation[] } = { price: 0, chat: [] } - if (model === LlmsModelsEnum.CLAUDE_OPUS || model === LlmsModelsEnum.CLAUDE_SONNET) { + if (model === LlmsModelsEnum.CLAUDE_OPUS || model === LlmsModelsEnum.CLAUDE_SONNET || model === LlmsModelsEnum.GEMINI) { result = await this.completionGen(payload) // , prompt.msgId, prompt.outputFormat) } else { result = await this.promptGen(payload) @@ -816,6 +830,7 @@ export class LlmsBot implements PayableBot { Sentry.setContext('llms', { retryCount, msg }) Sentry.captureException(e) ctx.chatAction = null + console.log('FCO', e) if (retryCount === 0) { // Retry limit reached, log an error or take alternative action this.logger.error(`Retry limit reached for error: ${e}`) @@ -873,6 +888,7 @@ export class LlmsBot implements PayableBot { ctx.transient.analytics.actualResponseTime = now() } } else if (e instanceof AxiosError) { + this.logger.error(`${e.message}`) await sendMessage(ctx, 'Error handling your request').catch(async (e) => { await this.onError(ctx, e, retryCount - 1) }) diff --git a/src/modules/llms/types.ts b/src/modules/llms/types.ts index 54bb775..b4ffa32 100644 --- a/src/modules/llms/types.ts +++ b/src/modules/llms/types.ts @@ -6,7 +6,8 @@ export enum LlmsModelsEnum { J2_ULTRA = 'j2-ultra', CLAUDE_OPUS = 'claude-3-opus-20240229', CLAUDE_SONNET = 'claude-3-sonnet-20240229', - CLAUDE_HAIKU = 'claude-3-haiku-20240307' + CLAUDE_HAIKU = 'claude-3-haiku-20240307', + GEMINI = 'gemini-1.0-pro' } export const LlmsModels: Record = { @@ -17,6 +18,13 @@ export const LlmsModels: Record = { maxContextTokens: 8192, chargeType: 'CHAR' }, + 'gemini-1.0-pro': { + name: 'gemini-1.0-pro', + inputPrice: 0.00025, // 3.00 (1M Tokens) => 0.003 (1K tokens) + outputPrice: 0.00125, + maxContextTokens: 4096, + chargeType: 'CHAR' + }, 'gpt-4-32k': { name: 'gpt-4-32k', inputPrice: 0.06, // 6 From 491c9d679565b09b866a9aee1a6df74ab8e88731 Mon Sep 17 00:00:00 2001 From: fegloff Date: Tue, 19 Mar 2024 00:29:10 -0500 Subject: [PATCH 2/8] clean flags --- src/modules/llms/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index 5e55ab8..b139f93 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -830,7 +830,6 @@ export class LlmsBot implements PayableBot { Sentry.setContext('llms', { retryCount, msg }) Sentry.captureException(e) ctx.chatAction = null - console.log('FCO', e) if (retryCount === 0) { // Retry limit reached, log an error or take alternative action this.logger.error(`Retry limit reached for error: ${e}`) From 3727b6b9c4815bf19910cf35a0da480d49cf7b62 Mon Sep 17 00:00:00 2001 From: fegloff Date: Thu, 21 Mar 2024 13:08:59 -0500 Subject: [PATCH 3/8] add token count on vertex module --- src/modules/llms/api/vertex.ts | 45 ++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/modules/llms/api/vertex.ts b/src/modules/llms/api/vertex.ts index ca5799f..94e2815 100644 --- a/src/modules/llms/api/vertex.ts +++ b/src/modules/llms/api/vertex.ts @@ -7,7 +7,7 @@ import { GrammyError } from 'grammy' import { pino } from 'pino' import { LlmsModelsEnum } from '../types' -const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // http://localhost:8080' // config.llms.apiEndpoint +const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // config.llms.apiEndpoint // 'http://127.0.0.1:5000' // config.llms.apiEndpoint const logger = pino({ name: 'Gemini - llmsBot', @@ -71,26 +71,34 @@ export const vertexStreamCompletion = async ( const completionStream: Readable = response.data // Read and process the stream let completion = '' + let outputTokens = '' + let inputTokens = '' for await (const chunk of completionStream) { const msg = chunk.toString() if (msg) { - completion += msg.split('Text: ')[1] - completion = completion.replaceAll('...', '') - completion += '...' - if (ctx.chat?.id) { - await ctx.api - .editMessageText(ctx.chat?.id, msgId, completion) - .catch(async (e: any) => { - if (e instanceof GrammyError) { - if (e.error_code !== 400) { - throw e + if (msg.startsWith('Text')) { + completion += msg.split('Text: ')[1] + completion = completion.replaceAll('...', '') + completion += '...' + if (ctx.chat?.id) { + await ctx.api + .editMessageText(ctx.chat?.id, msgId, completion) + .catch(async (e: any) => { + if (e instanceof GrammyError) { + if (e.error_code !== 400) { + throw e + } else { + logger.error(e) + } } else { - logger.error(e) + throw e } - } else { - throw e - } - }) + }) + } + } else if (msg.startsWith('Input Token')) { + const tokenMsg = msg.split('Input Token: ')[1] + inputTokens = tokenMsg.split('Output Tokens: ')[0] + outputTokens = tokenMsg.split('Output Tokens: ')[1] } } } @@ -108,8 +116,9 @@ export const vertexStreamCompletion = async ( throw e } }) - const totalOutputTokens = '10' // response.headers['x-openai-output-tokens'] - const totalInputTokens = '10' // response.headers['x-openai-input-tokens'] + const totalOutputTokens = outputTokens // response.headers['x-openai-output-tokens'] + const totalInputTokens = inputTokens // response.headers['x-openai-input-tokens'] + return { completion: { content: completion, From ed4b76013f6e8d9a302b617e28b541fa83204bc4 Mon Sep 17 00:00:00 2001 From: fegloff Date: Mon, 25 Mar 2024 12:34:52 -0500 Subject: [PATCH 4/8] add gemini pricing logic --- src/modules/llms/api/vertex.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/modules/llms/api/vertex.ts b/src/modules/llms/api/vertex.ts index 94e2815..d535b55 100644 --- a/src/modules/llms/api/vertex.ts +++ b/src/modules/llms/api/vertex.ts @@ -78,6 +78,12 @@ export const vertexStreamCompletion = async ( if (msg) { if (msg.startsWith('Text')) { completion += msg.split('Text: ')[1] + if (msg.includes('Input Token:')) { + const tokenMsg = msg.split('Input Token: ')[1] + inputTokens = tokenMsg.split('Output Tokens: ')[0] + outputTokens = tokenMsg.split('Output Tokens: ')[1] + completion = completion.split('Input Token: ')[0] + } completion = completion.replaceAll('...', '') completion += '...' if (ctx.chat?.id) { @@ -95,10 +101,6 @@ export const vertexStreamCompletion = async ( } }) } - } else if (msg.startsWith('Input Token')) { - const tokenMsg = msg.split('Input Token: ')[1] - inputTokens = tokenMsg.split('Output Tokens: ')[0] - outputTokens = tokenMsg.split('Output Tokens: ')[1] } } } From 003ec3f3caf67242a993479e1e4ec1978b45b6a5 Mon Sep 17 00:00:00 2001 From: fegloff Date: Wed, 27 Mar 2024 01:22:52 -0500 Subject: [PATCH 5/8] normalize llm conversation to work with claude and vertex models --- src/modules/llms/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index b139f93..b64d905 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -599,7 +599,7 @@ export class LlmsBot implements PayableBot { `streamChatCompletion result = tokens: ${price.promptTokens + price.completionTokens} | ${model} | price: ${price.price}¢` // } ) conversation.push({ - role: model === LlmsModelsEnum.GEMINI ? 'model' : 'assistant', + role: 'assistant', content: completion.completion?.content ?? '' }) return { From 4588b4126aca7f9ff9a50d671c3a74a52344e7dd Mon Sep 17 00:00:00 2001 From: fegloff Date: Wed, 27 Mar 2024 15:25:35 -0500 Subject: [PATCH 6/8] add gemini prefix --- src/modules/llms/helpers.ts | 13 ++++++++++++- src/modules/llms/index.ts | 3 ++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/modules/llms/helpers.ts b/src/modules/llms/helpers.ts index 43859bc..ed943a1 100644 --- a/src/modules/llms/helpers.ts +++ b/src/modules/llms/helpers.ts @@ -35,6 +35,7 @@ export const MAX_TRIES = 3 const LLAMA_PREFIX_LIST = ['* '] const BARD_PREFIX_LIST = ['b. ', 'B. '] const CLAUDE_OPUS_PREFIX_LIST = ['c. '] +const GEMINI_PREFIX_LIST = ['g. '] export const isMentioned = ( ctx: OnMessageContext | OnCallBackQueryData @@ -82,6 +83,16 @@ export const hasClaudeOpusPrefix = (prompt: string): string => { return '' } +export const hasGeminiPrefix = (prompt: string): string => { + const prefixList = GEMINI_PREFIX_LIST + for (let i = 0; i < prefixList.length; i++) { + if (prompt.toLocaleLowerCase().startsWith(prefixList[i])) { + return prefixList[i] + } + } + return '' +} + export const hasUrl = ( ctx: OnMessageContext | OnCallBackQueryData, prompt: string @@ -213,7 +224,7 @@ export const sendMessage = async ( export const hasPrefix = (prompt: string): string => { return ( - hasBardPrefix(prompt) || hasLlamaPrefix(prompt) || hasClaudeOpusPrefix(prompt) + hasBardPrefix(prompt) || hasLlamaPrefix(prompt) || hasClaudeOpusPrefix(prompt) || hasGeminiPrefix(prompt) ) } diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index b64d905..28af012 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -22,6 +22,7 @@ import { getPromptPrice, hasBardPrefix, hasClaudeOpusPrefix, + hasGeminiPrefix, hasLlamaPrefix, hasPrefix, hasUrl, @@ -129,7 +130,7 @@ export class LlmsBot implements PayableBot { await this.onChat(ctx, LlmsModelsEnum.BISON) return } - if (ctx.hasCommand(SupportedCommands.gemini) || ctx.hasCommand(SupportedCommands.gShort)) { + if (ctx.hasCommand([SupportedCommands.gemini, SupportedCommands.gShort]) || (hasGeminiPrefix(ctx.message?.text ?? '') !== '')) { await this.onChat(ctx, LlmsModelsEnum.GEMINI) return } From cbe63e8b526f3286ef7432e812cc5c8abbeca475 Mon Sep 17 00:00:00 2001 From: fegloff Date: Wed, 27 Mar 2024 17:37:38 -0500 Subject: [PATCH 7/8] fix word cut in streaming --- src/modules/llms/api/athropic.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/modules/llms/api/athropic.ts b/src/modules/llms/api/athropic.ts index 2d7e2b9..758a791 100644 --- a/src/modules/llms/api/athropic.ts +++ b/src/modules/llms/api/athropic.ts @@ -88,9 +88,16 @@ export const anthropicStreamCompletion = async ( if (msg) { if (msg.startsWith('Input Token')) { inputTokens = msg.split('Input Token: ')[1] - } else if (msg.startsWith('Text')) { + } else if (msg.startsWith('Output Tokens')) { + outputTokens = msg.split('Output Tokens: ')[1] + } else { wordCount++ - completion += msg.split('Text: ')[1] + completion += msg // .split('Text: ')[1] + if (msg.includes('Output Tokens:')) { + const tokenMsg = msg.split('Output Tokens: ')[1] + outputTokens = tokenMsg.split('Output Tokens: ')[1] + completion = completion.split('Output Tokens: ')[0] + } if (wordCount > wordCountMinimum) { // if (chunck === '.' && wordCount > wordCountMinimum) { if (wordCountMinimum < 64) { wordCountMinimum *= 2 @@ -114,8 +121,6 @@ export const anthropicStreamCompletion = async ( }) } } - } else if (msg.startsWith('Output Tokens')) { - outputTokens = msg.split('Output Tokens: ')[1] } } } From 8d52ff7d7d06f2ff7de7f163e27f1cb561921e7d Mon Sep 17 00:00:00 2001 From: fegloff Date: Wed, 27 Mar 2024 23:59:45 -0500 Subject: [PATCH 8/8] improve gemini stream to avoid word cutting --- src/modules/llms/api/vertex.ts | 46 ++++++++++++++++------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/modules/llms/api/vertex.ts b/src/modules/llms/api/vertex.ts index d535b55..372cb95 100644 --- a/src/modules/llms/api/vertex.ts +++ b/src/modules/llms/api/vertex.ts @@ -7,7 +7,7 @@ import { GrammyError } from 'grammy' import { pino } from 'pino' import { LlmsModelsEnum } from '../types' -const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // config.llms.apiEndpoint // 'http://127.0.0.1:5000' // config.llms.apiEndpoint +const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // 'http://127.0.0.1:5000' // config.llms.apiEndpoint const logger = pino({ name: 'Gemini - llmsBot', @@ -76,31 +76,29 @@ export const vertexStreamCompletion = async ( for await (const chunk of completionStream) { const msg = chunk.toString() if (msg) { - if (msg.startsWith('Text')) { - completion += msg.split('Text: ')[1] - if (msg.includes('Input Token:')) { - const tokenMsg = msg.split('Input Token: ')[1] - inputTokens = tokenMsg.split('Output Tokens: ')[0] - outputTokens = tokenMsg.split('Output Tokens: ')[1] - completion = completion.split('Input Token: ')[0] - } - completion = completion.replaceAll('...', '') - completion += '...' - if (ctx.chat?.id) { - await ctx.api - .editMessageText(ctx.chat?.id, msgId, completion) - .catch(async (e: any) => { - if (e instanceof GrammyError) { - if (e.error_code !== 400) { - throw e - } else { - logger.error(e) - } - } else { + completion += msg // .split('Text: ')[1] + if (msg.includes('Input Token:')) { + const tokenMsg = msg.split('Input Token: ')[1] + inputTokens = tokenMsg.split('Output Tokens: ')[0] + outputTokens = tokenMsg.split('Output Tokens: ')[1] + completion = completion.split('Input Token: ')[0] + } + completion = completion.replaceAll('...', '') + completion += '...' + if (ctx.chat?.id) { + await ctx.api + .editMessageText(ctx.chat?.id, msgId, completion) + .catch(async (e: any) => { + if (e instanceof GrammyError) { + if (e.error_code !== 400) { throw e + } else { + logger.error(e) } - }) - } + } else { + throw e + } + }) } } }