From cd4f28482c50c245651720fbc216101338c1de5e Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Fri, 5 Apr 2024 10:05:30 -0500
Subject: [PATCH] add llmsBase class, and vertexBot, claudeBot, llmBot derived
 classes

---
 src/bot.ts                    |  17 ++
 src/modules/llms/claudeBot.ts |  96 ++++++++
 src/modules/llms/helpers.ts   |  10 +-
 src/modules/llms/index.ts     |  78 +++----
 src/modules/llms/llmsBase.ts  | 410 ++++++++++++++++++++++++++++++++++
 src/modules/llms/llmsBot.ts   |  77 +++++++
 src/modules/llms/vertexBot.ts |  87 ++++++++
 src/modules/open-ai/index.ts  |   4 +-
 8 files changed, 734 insertions(+), 45 deletions(-)
 create mode 100644 src/modules/llms/claudeBot.ts
 create mode 100644 src/modules/llms/llmsBase.ts
 create mode 100644 src/modules/llms/llmsBot.ts
 create mode 100644 src/modules/llms/vertexBot.ts
diff --git a/src/bot.ts b/src/bot.ts
index 013780f2..4255b4ec 100644
--- a/src/bot.ts
+++ b/src/bot.ts
@@ -27,6 +27,8 @@ import { VoiceMemo } from './modules/voice-memo'
 // import { QRCodeBot } from './modules/qrcode/QRCodeBot'
 // import { SDImagesBot } from './modules/sd-images'
 import { OpenAIBot } from './modules/open-ai'
+import { ClaudeBot } from './modules/llms/claudeBot'
+import { VertexBot } from './modules/llms/vertexBot'
 import { OneCountryBot } from './modules/1country'
 import { WalletConnect } from './modules/walletconnect'
 import { BotPayments } from './modules/payment'
@@ -248,6 +250,8 @@ const walletConnect = new WalletConnect()
 const payments = new BotPayments()
 const schedule = new BotSchedule(bot)
 const openAiBot = new OpenAIBot(payments)
+const claudeBot = new ClaudeBot(payments)
+const vertexBot = new VertexBot(payments)
 const oneCountryBot = new OneCountryBot(payments)
 const translateBot = new TranslateBot()
 const telegramPayments = new TelegramPayments(payments)
@@ -372,6 +376,8 @@ const PayableBots: Record<string, PayableBotConfig> = {
   textToSpeech: { bot: textToSpeechBot },
   voiceToVoiceGPTBot: { bot: voiceToVoiceGPTBot },
   voiceToText: { bot: voiceToTextBot },
+  claudeBot: { bot: claudeBot },
+  vertexBot: { bot: vertexBot },
   openAiBot: {
     enabled: (ctx: OnMessageContext) => ctx.session.openAi.imageGen.isEnabled,
     bot: openAiBot
@@ -467,6 +473,16 @@ const onCallback = async (ctx: OnCallBackQueryData): Promise<void> => {
     //   return
     // }
 
+    if (vertexBot.isSupportedEvent(ctx)) {
+      await vertexBot.onEvent(ctx)
+      return
+    }
+
+    if (claudeBot.isSupportedEvent(ctx)) {
+      await claudeBot.onEvent(ctx)
+      return
+    }
+
     if (openAiBot.isSupportedEvent(ctx)) {
       await openAiBot.onEvent(ctx, (e) => {
         logger.error(e)
@@ -575,6 +591,7 @@ bot.command('love', async (ctx) => {
 bot.command('stop', async (ctx) => {
   logger.info('/stop command')
   await openAiBot.onStop(ctx as OnMessageContext)
+  await claudeBot.onStop(ctx as OnMessageContext)
   ctx.session.translate.enable = false
   ctx.session.translate.languages = []
   ctx.session.oneCountry.lastDomain = ''
diff --git a/src/modules/llms/claudeBot.ts b/src/modules/llms/claudeBot.ts
new file mode 100644
index 00000000..9c4f468f
--- /dev/null
+++ b/src/modules/llms/claudeBot.ts
@@ -0,0 +1,96 @@
+import { type BotPayments } from '../payment'
+import {
+  type OnMessageContext,
+  type OnCallBackQueryData,
+  type ChatConversation
+} from '../types'
+import {
+  hasClaudeOpusPrefix,
+  isMentioned,
+  SupportedCommands
+} from './helpers'
+import { type LlmCompletion } from './api/llmApi'
+import { LlmsModelsEnum } from './types'
+
+import { anthropicCompletion, anthropicStreamCompletion } from './api/athropic'
+import { LlmsBase } from './llmsBase'
+export class ClaudeBot extends LlmsBase {
+  constructor (payments: BotPayments) {
+    super(payments, 'ClaudeBot')
+  }
+
+  public getEstimatedPrice (ctx: any): number {
+    return 0
+  }
+
+  public isSupportedEvent (
+    ctx: OnMessageContext | OnCallBackQueryData
+  ): boolean {
+    const hasCommand = ctx.hasCommand([SupportedCommands.claudeOpus,
+      SupportedCommands.opus,
+      SupportedCommands.opusShort,
+      SupportedCommands.claudeShort,
+      SupportedCommands.claudeSonnet,
+      SupportedCommands.sonnet,
+      SupportedCommands.sonnetShort,
+      SupportedCommands.claudeHaiku,
+      SupportedCommands.haikuShort])
+    if (isMentioned(ctx)) {
+      return true
+    }
+    const chatPrefix = this.hasPrefix(ctx.message?.text ?? '')
+    if (chatPrefix !== '') {
+      return true
+    }
+    return hasCommand
+  }
+
+  hasPrefix (prompt: string): string {
+    return (
+      hasClaudeOpusPrefix(prompt)
+    )
+  }
+
+  async chatStreamCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum,
+    ctx: OnMessageContext | OnCallBackQueryData,
+    msgId: number,
+    limitTokens: boolean): Promise<LlmCompletion> {
+    return await anthropicStreamCompletion(
+      conversation,
+      model as LlmsModelsEnum,
+      ctx,
+      msgId,
+      true // telegram messages has a character limit
+    )
+  }
+
+  async chatCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum
+  ): Promise<LlmCompletion> {
+    return await anthropicCompletion(conversation, model)
+  }
+
+  public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    ctx.transient.analytics.module = this.module
+    const isSupportedEvent = this.isSupportedEvent(ctx)
+    if (!isSupportedEvent && ctx.chat?.type !== 'private') {
+      this.logger.warn(`### unsupported command ${ctx.message?.text}`)
+      return
+    }
+
+    if (ctx.hasCommand([SupportedCommands.claudeOpus, SupportedCommands.opus, SupportedCommands.opusShort, SupportedCommands.claudeShort]) || (hasClaudeOpusPrefix(ctx.message?.text ?? '') !== '')) {
+      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_OPUS, true)
+      return
+    }
+    if (ctx.hasCommand([SupportedCommands.claudeSonnet, SupportedCommands.sonnet, SupportedCommands.sonnetShort])) {
+      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_SONNET, true)
+      return
+    }
+    if (ctx.hasCommand([SupportedCommands.claudeHaiku, SupportedCommands.haikuShort])) {
+      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_HAIKU, false)
+    }
+  }
+}
diff --git a/src/modules/llms/helpers.ts b/src/modules/llms/helpers.ts
index 28d8d9e9..c43218a8 100644
--- a/src/modules/llms/helpers.ts
+++ b/src/modules/llms/helpers.ts
@@ -221,11 +221,11 @@ export const sendMessage = async (
   return await ctx.reply(msg, extras)
 }
 
-export const hasPrefix = (prompt: string): string => {
-  return (
-    hasBardPrefix(prompt) || hasLlamaPrefix(prompt) || hasClaudeOpusPrefix(prompt) || hasGeminiPrefix(prompt)
-  )
-}
+// export const hasPrefix = (prompt: string): string => {
+//   return (
+//     hasBardPrefix(prompt) || hasLlamaPrefix(prompt) || hasClaudeOpusPrefix(prompt) || hasGeminiPrefix(prompt)
+//   )
+// }
 
 export const getPromptPrice = (completion: LlmCompletion, data: ChatPayload, updateSession = true): { price: number, promptTokens: number, completionTokens: number } => {
   const { ctx, model } = data
diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
index 7a345412..34b17997 100644
--- a/src/modules/llms/index.ts
+++ b/src/modules/llms/index.ts
@@ -21,11 +21,7 @@ import {
   addUrlToCollection,
   getMinBalance,
   getPromptPrice,
-  hasBardPrefix,
-  hasClaudeOpusPrefix,
-  hasGeminiPrefix,
   hasLlamaPrefix,
-  hasPrefix,
   hasUrl,
   isMentioned,
   limitPrompt,
@@ -63,6 +59,12 @@ export class LlmsBot implements PayableBot {
     return 0
   }
 
+  hasPrefix (prompt: string): string {
+    return (
+      hasLlamaPrefix(prompt)
+    )
+  }
+
   public isSupportedEvent (
     ctx: OnMessageContext | OnCallBackQueryData
   ): boolean {
@@ -72,7 +74,7 @@ export class LlmsBot implements PayableBot {
     if (isMentioned(ctx)) {
       return true
     }
-    const chatPrefix = hasPrefix(ctx.message?.text ?? '')
+    const chatPrefix = this.hasPrefix(ctx.message?.text ?? '')
     const hasUrl = this.isSupportedUrlReply(ctx)
     const hasPdf = this.isSupportedPdfReply(ctx)
     if (chatPrefix !== '') {
@@ -111,10 +113,10 @@ export class LlmsBot implements PayableBot {
       return
     }
 
-    if (hasBardPrefix(ctx.message?.text ?? '') !== '') {
-      await this.onPrefix(ctx, LlmsModelsEnum.BISON)
-      return
-    }
+    // if (hasBardPrefix(ctx.message?.text ?? '') !== '') {
+    //   await this.onPrefix(ctx, LlmsModelsEnum.BISON)
+    //   return
+    // }
 
     if (hasLlamaPrefix(ctx.message?.text ?? '') !== '') {
       await this.onCurrentCollection(ctx)
@@ -126,30 +128,30 @@ export class LlmsBot implements PayableBot {
       return
     }
 
-    if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) {
-      await this.onChat(ctx, LlmsModelsEnum.BISON)
-      return
-    }
-    if (ctx.hasCommand([SupportedCommands.gemini, SupportedCommands.gShort]) || (hasGeminiPrefix(ctx.message?.text ?? '') !== '')) {
-      await this.onChat(ctx, LlmsModelsEnum.GEMINI)
-      return
-    }
-    if (ctx.hasCommand([SupportedCommands.claudeOpus, SupportedCommands.opus, SupportedCommands.opusShort, SupportedCommands.claudeShort]) || (hasClaudeOpusPrefix(ctx.message?.text ?? '') !== '')) {
-      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_OPUS)
-      return
-    }
-    if (ctx.hasCommand([SupportedCommands.claudeSonnet, SupportedCommands.sonnet, SupportedCommands.sonnetShort])) {
-      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_SONNET)
-      return
-    }
-    if (ctx.hasCommand([SupportedCommands.claudeHaiku, SupportedCommands.haikuShort])) {
-      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_HAIKU)
-      return
-    }
-    if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) {
-      await this.onChat(ctx, LlmsModelsEnum.BISON)
-      return
-    }
+    // if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) {
+    //   await this.onChat(ctx, LlmsModelsEnum.BISON)
+    //   return
+    // }
+    // if (ctx.hasCommand([SupportedCommands.gemini, SupportedCommands.gShort]) || (hasGeminiPrefix(ctx.message?.text ?? '') !== '')) {
+    //   await this.onChat(ctx, LlmsModelsEnum.GEMINI)
+    //   return
+    // }
+    // if (ctx.hasCommand([SupportedCommands.claudeOpus, SupportedCommands.opus, SupportedCommands.opusShort, SupportedCommands.claudeShort]) || (hasClaudeOpusPrefix(ctx.message?.text ?? '') !== '')) {
+    //   await this.onChat(ctx, LlmsModelsEnum.CLAUDE_OPUS)
+    //   return
+    // }
+    // if (ctx.hasCommand([SupportedCommands.claudeSonnet, SupportedCommands.sonnet, SupportedCommands.sonnetShort])) {
+    //   await this.onChat(ctx, LlmsModelsEnum.CLAUDE_SONNET)
+    //   return
+    // }
+    // if (ctx.hasCommand([SupportedCommands.claudeHaiku, SupportedCommands.haikuShort])) {
+    //   await this.onChat(ctx, LlmsModelsEnum.CLAUDE_HAIKU)
+    //   return
+    // }
+    // if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) {
+    //   await this.onChat(ctx, LlmsModelsEnum.BISON)
+    //   return
+    // }
 
     if (this.isSupportedUrlReply(ctx)) {
       await this.onUrlReplyHandler(ctx)
@@ -166,10 +168,10 @@ export class LlmsBot implements PayableBot {
       return
     }
 
-    if (ctx.hasCommand(SupportedCommands.j2Ultra)) {
-      await this.onChat(ctx, LlmsModelsEnum.J2_ULTRA)
-      return
-    }
+    // if (ctx.hasCommand(SupportedCommands.j2Ultra)) {
+    //   await this.onChat(ctx, LlmsModelsEnum.J2_ULTRA)
+    //   return
+    // }
 
     if (ctx.hasCommand(SupportedCommands.ctx)) {
       await this.onCurrentCollection(ctx)
@@ -696,7 +698,7 @@ export class LlmsBot implements PayableBot {
         ctx,
         SupportedCommands
       )
-      const prefix = hasPrefix(prompt)
+      const prefix = this.hasPrefix(prompt)
       ctx.session.llms.requestQueue.push({
         content: await preparePrompt(ctx, prompt.slice(prefix.length)),
         model
diff --git a/src/modules/llms/llmsBase.ts b/src/modules/llms/llmsBase.ts
new file mode 100644
index 00000000..90ea5787
--- /dev/null
+++ b/src/modules/llms/llmsBase.ts
@@ -0,0 +1,410 @@
+import { GrammyError } from 'grammy'
+import { type Logger, pino } from 'pino'
+
+import { getCommandNamePrompt } from '../1country/utils'
+import { type BotPayments } from '../payment'
+import {
+  type OnMessageContext,
+  type OnCallBackQueryData,
+  type ChatConversation,
+  type ChatPayload,
+  type PayableBot,
+  RequestState
+} from '../types'
+import { appText } from '../open-ai/utils/text'
+import { chatService } from '../../database/services'
+import config from '../../config'
+import { sleep } from '../sd-images/utils'
+import {
+  getMinBalance,
+  getPromptPrice,
+  limitPrompt,
+  MAX_TRIES,
+  SupportedCommands
+} from './helpers'
+import { preparePrompt, sendMessage } from '../open-ai/helpers'
+import { type LlmCompletion, deleteCollection } from './api/llmApi'
+import * as Sentry from '@sentry/node'
+import { now } from '../../utils/perf'
+import { AxiosError } from 'axios'
+import OpenAI from 'openai'
+import { type LlmsModelsEnum } from './types'
+
+export abstract class LlmsBase implements PayableBot {
+  public module: string
+  protected readonly logger: Logger
+  protected readonly payments: BotPayments
+  protected botSuspended: boolean
+
+  constructor (payments: BotPayments, module: string) {
+    this.module = module
+    this.logger = pino({
+      name: this.module,
+      transport: {
+        target: 'pino-pretty',
+        options: { colorize: true }
+      }
+    })
+    this.botSuspended = false
+    this.payments = payments
+  }
+
+  public abstract onEvent (ctx: OnMessageContext | OnCallBackQueryData, refundCallback: (reason?: string) => void): Promise<void>
+  public abstract isSupportedEvent (
+    ctx: OnMessageContext | OnCallBackQueryData
+  ): boolean
+
+  public abstract getEstimatedPrice (ctx: any): number
+
+  protected abstract chatStreamCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum,
+    ctx: OnMessageContext | OnCallBackQueryData,
+    msgId: number,
+    limitTokens: boolean): Promise<LlmCompletion>
+
+  protected abstract chatCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum
+  ): Promise<LlmCompletion>
+
+  protected abstract hasPrefix (prompt: string): string
+
+  async onPrefix (ctx: OnMessageContext | OnCallBackQueryData, model: string, stream: boolean): Promise<void> {
+    try {
+      if (this.botSuspended) {
+        ctx.transient.analytics.sessionState = RequestState.Error
+        sendMessage(ctx, 'The bot is suspended').catch(async (e) => { await this.onError(ctx, e) })
+        ctx.transient.analytics.actualResponseTime = now()
+        return
+      }
+      const { prompt } = getCommandNamePrompt(
+        ctx,
+        SupportedCommands
+      )
+      const prefix = this.hasPrefix(prompt)
+      ctx.session.llms.requestQueue.push({
+        content: await preparePrompt(ctx, prompt.slice(prefix.length)),
+        model
+      })
+      if (!ctx.session.llms.isProcessingQueue) {
+        ctx.session.llms.isProcessingQueue = true
+        await this.onChatRequestHandler(ctx, stream).then(() => {
+          ctx.session.llms.isProcessingQueue = false
+        })
+      }
+    } catch (e) {
+      await this.onError(ctx, e)
+    }
+  }
+
+  async onChat (ctx: OnMessageContext | OnCallBackQueryData, model: string, stream: boolean): Promise<void> {
+    try {
+      if (this.botSuspended) {
+        ctx.transient.analytics.sessionState = RequestState.Error
+        sendMessage(ctx, 'The bot is suspended').catch(async (e) => { await this.onError(ctx, e) })
+        ctx.transient.analytics.actualResponseTime = now()
+        return
+      }
+      const prompt = ctx.match ? ctx.match : ctx.message?.text
+      ctx.session.llms.requestQueue.push({
+        model,
+        content: await preparePrompt(ctx, prompt as string)
+      })
+      if (!ctx.session.llms.isProcessingQueue) {
+        ctx.session.llms.isProcessingQueue = true
+        await this.onChatRequestHandler(ctx, stream).then(() => {
+          ctx.session.llms.isProcessingQueue = false
+        })
+      }
+      ctx.transient.analytics.actualResponseTime = now()
+    } catch (e: any) {
+      await this.onError(ctx, e)
+    }
+  }
+
+  async onChatRequestHandler (ctx: OnMessageContext | OnCallBackQueryData, stream: boolean): Promise<void> {
+    while (ctx.session.llms.requestQueue.length > 0) {
+      try {
+        const msg = ctx.session.llms.requestQueue.shift()
+        const prompt = msg?.content as string
+        const model = msg?.model
+        const { chatConversation } = ctx.session.llms
+        const minBalance = await getMinBalance(ctx, msg?.model as LlmsModelsEnum)
+        if (await this.hasBalance(ctx, minBalance)) {
+          if (!prompt) {
+            const msg =
+              chatConversation.length > 0
+                ? `${appText.gptLast}\n_${
+                    chatConversation[chatConversation.length - 1].content
+                  }_`
+                : appText.introText
+            ctx.transient.analytics.sessionState = RequestState.Success
+            await sendMessage(ctx, msg, { parseMode: 'Markdown' }).catch(async (e) => {
+              await this.onError(ctx, e)
+            })
+            ctx.transient.analytics.actualResponseTime = now()
+            return
+          }
+          const chat: ChatConversation = {
+            content: limitPrompt(prompt),
+            role: 'user',
+            model
+          }
+          chatConversation.push(chat)
+          const payload = {
+            conversation: chatConversation,
+            model: model ?? config.llms.model,
+            ctx
+          }
+          let result: { price: number, chat: ChatConversation[] } = { price: 0, chat: [] }
+          if (stream) {
+            result = await this.completionGen(payload)
+          } else {
+            result = await this.promptGen(payload)
+          }
+          ctx.session.llms.chatConversation = [...result.chat]
+          if (
+            !(await this.payments.pay(ctx as OnMessageContext, result.price))
+          ) {
+            await this.onNotBalanceMessage(ctx)
+          }
+          ctx.chatAction = null
+        } else {
+          await this.onNotBalanceMessage(ctx)
+        }
+      } catch (e: any) {
+        ctx.session.llms.chatConversation = []
+        await this.onError(ctx, e)
+      }
+    }
+  }
+
+  async onStop (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    for (const c of ctx.session.collections.activeCollections) {
+      this.logger.info(`Deleting collection ${c.collectionName}`)
+      await deleteCollection(c.collectionName)
+    }
+    ctx.session.collections.activeCollections = []
+    ctx.session.collections.collectionConversation = []
+    ctx.session.collections.collectionRequestQueue = []
+    ctx.session.collections.currentCollection = ''
+    ctx.session.collections.isProcessingQueue = false
+    ctx.session.llms.chatConversation = []
+    ctx.session.llms.usage = 0
+    ctx.session.llms.price = 0
+  }
+
+  private async hasBalance (ctx: OnMessageContext | OnCallBackQueryData, minBalance = +config.llms.minimumBalance): Promise<boolean> {
+    const minBalanceOne = this.payments.toONE(await this.payments.getPriceInONE(minBalance), false)
+    const accountId = this.payments.getAccountId(ctx)
+    const addressBalance = await this.payments.getUserBalance(accountId)
+    const { totalCreditsAmount } = await chatService.getUserCredits(accountId)
+    const balance = addressBalance.plus(totalCreditsAmount)
+    const balanceOne = this.payments.toONE(balance, false).toFixed(2)
+    const isGroupInWhiteList = await this.payments.isGroupInWhitelist(ctx as OnMessageContext)
+    return (
+      +balanceOne > +minBalanceOne ||
+      (this.payments.isUserInWhitelist(ctx.from.id, ctx.from.username)) ||
+      isGroupInWhiteList
+    )
+  }
+
+  private async completionGen (data: ChatPayload, msgId?: number, outputFormat = 'text'): Promise< { price: number, chat: ChatConversation[] }> {
+    const { conversation, ctx, model } = data
+    try {
+      if (!msgId) {
+        ctx.transient.analytics.firstResponseTime = now()
+        msgId = (
+          await ctx.reply('...', {
+            message_thread_id:
+              ctx.message?.message_thread_id ??
+              ctx.message?.reply_to_message?.message_thread_id
+          })
+        ).message_id
+      }
+      if (outputFormat === 'text') {
+        const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
+        if (isTypingEnabled) {
+          ctx.chatAction = 'typing'
+        }
+        const completion = await this.chatStreamCompletion(conversation,
+          model as LlmsModelsEnum,
+          ctx,
+          msgId,
+          true // telegram messages has a character limit
+        )
+        if (isTypingEnabled) {
+          ctx.chatAction = null
+        }
+        if (completion) {
+          ctx.transient.analytics.sessionState = RequestState.Success
+          ctx.transient.analytics.actualResponseTime = now()
+          const price = getPromptPrice(completion, data)
+          this.logger.info(
+            `streamChatCompletion result = tokens: ${price.promptTokens + price.completionTokens} | ${model} | price: ${price.price}¢` //   }
+          )
+          conversation.push({
+            role: 'assistant',
+            content: completion.completion?.content ?? '',
+            model
+          })
+          return {
+            price: price.price,
+            chat: conversation
+          }
+        }
+      } else {
+        const response = await this.chatCompletion(conversation, model as LlmsModelsEnum)
+        conversation.push({
+          role: 'assistant',
+          content: response.completion?.content ?? '',
+          model
+        })
+        return {
+          price: response.price,
+          chat: conversation
+        }
+      }
+      return {
+        price: 0,
+        chat: conversation
+      }
+    } catch (e: any) {
+      Sentry.captureException(e)
+      ctx.chatAction = null
+      throw e
+    }
+  }
+
+  private async promptGen (data: ChatPayload): Promise<{ price: number, chat: ChatConversation[] }> {
+    const { conversation, ctx, model } = data
+    if (!ctx.chat?.id) {
+      throw new Error('internal error')
+    }
+    const msgId = (
+      await ctx.reply('...', { message_thread_id: ctx.message?.message_thread_id })
+    ).message_id
+    ctx.chatAction = 'typing'
+    const response = await this.chatCompletion(conversation, model as LlmsModelsEnum)
+    if (response.completion) {
+      await ctx.api.editMessageText(
+        ctx.chat.id,
+        msgId,
+        response.completion.content as string
+      )
+      conversation.push(response.completion)
+      // const price = getPromptPrice(completion, data);
+      // this.logger.info(
+      //   `streamChatCompletion result = tokens: ${
+      //     price.promptTokens + price.completionTokens
+      //   } | ${model} | price: ${price}¢`
+      // );
+      return {
+        price: 0,
+        chat: conversation
+      }
+    }
+    // ctx.chatAction = null
+    ctx.transient.analytics.actualResponseTime = now()
+    return {
+      price: 0,
+      chat: conversation
+    }
+  }
+
+  async onNotBalanceMessage (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    const accountId = this.payments.getAccountId(ctx)
+    const account = this.payments.getUserAccount(accountId)
+    const addressBalance = await this.payments.getUserBalance(accountId)
+    const { totalCreditsAmount } = await chatService.getUserCredits(accountId)
+    const balance = addressBalance.plus(totalCreditsAmount)
+    const balanceOne = this.payments.toONE(balance, false).toFixed(2)
+    const balanceMessage = appText.notEnoughBalance
+      .replaceAll('$CREDITS', balanceOne)
+      .replaceAll('$WALLET_ADDRESS', account?.address ?? '')
+    ctx.transient.analytics.sessionState = RequestState.Success
+    await sendMessage(ctx, balanceMessage, { parseMode: 'Markdown' }).catch(async (e) => { await this.onError(ctx, e) })
+    ctx.transient.analytics.actualResponseTime = now()
+  }
+
+  async onError (
+    ctx: OnMessageContext | OnCallBackQueryData,
+    e: any,
+    retryCount: number = MAX_TRIES,
+    msg?: string
+  ): Promise<void> {
+    ctx.transient.analytics.sessionState = RequestState.Error
+    Sentry.setContext('llms', { retryCount, msg })
+    Sentry.captureException(e)
+    ctx.chatAction = null
+    if (retryCount === 0) {
+      // Retry limit reached, log an error or take alternative action
+      this.logger.error(`Retry limit reached for error: ${e}`)
+      return
+    }
+    if (e instanceof GrammyError) {
+      if (e.error_code === 400 && e.description.includes('not enough rights')) {
+        await sendMessage(
+          ctx,
+          'Error: The bot does not have permission to send photos in chat'
+        )
+        ctx.transient.analytics.actualResponseTime = now()
+      } else if (e.error_code === 429) {
+        this.botSuspended = true
+        const retryAfter = e.parameters.retry_after
+          ? e.parameters.retry_after < 60
+            ? 60
+            : e.parameters.retry_after * 2
+          : 60
+        const method = e.method
+        const errorMessage = `On method "${method}" | ${e.error_code} - ${e.description}`
+        this.logger.error(errorMessage)
+        await sendMessage(
+          ctx,
+          `${
+            ctx.from.username ? ctx.from.username : ''
+          } Bot has reached limit, wait ${retryAfter} seconds`
+        ).catch(async (e) => { await this.onError(ctx, e, retryCount - 1) })
+        ctx.transient.analytics.actualResponseTime = now()
+        if (method === 'editMessageText') {
+          ctx.session.llms.chatConversation.pop() // deletes last prompt
+        }
+        await sleep(retryAfter * 1000) // wait retryAfter seconds to enable bot
+        this.botSuspended = false
+      } else {
+        this.logger.error(
+          `On method "${e.method}" | ${e.error_code} - ${e.description}`
+        )
+        ctx.transient.analytics.actualResponseTime = now()
+        await sendMessage(ctx, 'Error handling your request').catch(async (e) => { await this.onError(ctx, e, retryCount - 1) })
+      }
+    } else if (e instanceof OpenAI.APIError) {
+      // 429 RateLimitError
+      // e.status = 400 || e.code = BadRequestError
+      this.logger.error(`OPENAI Error ${e.status}(${e.code}) - ${e.message}`)
+      if (e.code === 'context_length_exceeded') {
+        await sendMessage(ctx, e.message).catch(async (e) => { await this.onError(ctx, e, retryCount - 1) })
+        ctx.transient.analytics.actualResponseTime = now()
+        await this.onStop(ctx)
+      } else {
+        await sendMessage(
+          ctx,
+          'Error accessing OpenAI (ChatGPT). Please try later'
+        ).catch(async (e) => { await this.onError(ctx, e, retryCount - 1) })
+        ctx.transient.analytics.actualResponseTime = now()
+      }
+    } else if (e instanceof AxiosError) {
+      this.logger.error(`${e.message}`)
+      await sendMessage(ctx, 'Error handling your request').catch(async (e) => {
+        await this.onError(ctx, e, retryCount - 1)
+      })
+    } else {
+      this.logger.error(`${e.toString()}`)
+      await sendMessage(ctx, 'Error handling your request')
+        .catch(async (e) => { await this.onError(ctx, e, retryCount - 1) }
+        )
+      ctx.transient.analytics.actualResponseTime = now()
+    }
+  }
+}
diff --git a/src/modules/llms/llmsBot.ts b/src/modules/llms/llmsBot.ts
new file mode 100644
index 00000000..af559040
--- /dev/null
+++ b/src/modules/llms/llmsBot.ts
@@ -0,0 +1,77 @@
+import { type BotPayments } from '../payment'
+import {
+  type OnMessageContext,
+  type OnCallBackQueryData,
+  type ChatConversation
+} from '../types'
+import {
+  isMentioned,
+  SupportedCommands
+} from './helpers'
+import { llmCompletion, type LlmCompletion } from './api/llmApi'
+import { LlmsModelsEnum } from './types'
+import { LlmsBase } from './llmsBase'
+
+export class LlmsBot extends LlmsBase {
+  constructor (payments: BotPayments) {
+    super(payments, 'LlmsBot')
+  }
+
+  public getEstimatedPrice (ctx: any): number {
+    return 0
+  }
+
+  public isSupportedEvent (
+    ctx: OnMessageContext | OnCallBackQueryData
+  ): boolean {
+    const hasCommand = ctx.hasCommand([
+      SupportedCommands.j2Ultra
+    ])
+    if (isMentioned(ctx)) {
+      return true
+    }
+    const chatPrefix = this.hasPrefix(ctx.message?.text ?? '')
+    if (chatPrefix !== '') {
+      return true
+    }
+    return hasCommand
+  }
+
+  async chatStreamCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum,
+    ctx: OnMessageContext | OnCallBackQueryData,
+    msgId: number,
+    limitTokens: boolean): Promise<LlmCompletion> {
+    return {
+      completion: undefined,
+      usage: 0,
+      price: 0,
+      inputTokens: 0,
+      outputTokens: 0
+    }
+  }
+
+  async chatCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum
+  ): Promise<LlmCompletion> {
+    return await llmCompletion(conversation, model)
+  }
+
+  hasPrefix (prompt: string): string {
+    return ''
+  }
+
+  public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    ctx.transient.analytics.module = this.module
+    const isSupportedEvent = this.isSupportedEvent(ctx)
+    if (!isSupportedEvent && ctx.chat?.type !== 'private') {
+      this.logger.warn(`### unsupported command ${ctx.message?.text}`)
+      return
+    }
+    if (ctx.hasCommand(SupportedCommands.j2Ultra)) {
+      await this.onChat(ctx, LlmsModelsEnum.J2_ULTRA, false)
+    }
+  }
+}
diff --git a/src/modules/llms/vertexBot.ts b/src/modules/llms/vertexBot.ts
new file mode 100644
index 00000000..225eb7ec
--- /dev/null
+++ b/src/modules/llms/vertexBot.ts
@@ -0,0 +1,87 @@
+import { type BotPayments } from '../payment'
+import {
+  type OnMessageContext,
+  type OnCallBackQueryData,
+  type ChatConversation
+} from '../types'
+import {
+  hasBardPrefix,
+  hasGeminiPrefix,
+  isMentioned,
+  SupportedCommands
+} from './helpers'
+import { type LlmCompletion } from './api/llmApi'
+import { LlmsModelsEnum } from './types'
+
+import { LlmsBase } from './llmsBase'
+import { vertexCompletion, vertexStreamCompletion } from './api/vertex'
+export class VertexBot extends LlmsBase {
+  constructor (payments: BotPayments) {
+    super(payments, 'VertexBot')
+  }
+
+  public getEstimatedPrice (ctx: any): number {
+    return 0
+  }
+
+  public isSupportedEvent (
+    ctx: OnMessageContext | OnCallBackQueryData
+  ): boolean {
+    const hasCommand = ctx.hasCommand([
+      SupportedCommands.bard,
+      SupportedCommands.bardF,
+      SupportedCommands.gemini,
+      SupportedCommands.gShort])
+    if (isMentioned(ctx)) {
+      return true
+    }
+    const chatPrefix = this.hasPrefix(ctx.message?.text ?? '')
+    if (chatPrefix !== '') {
+      return true
+    }
+    return hasCommand
+  }
+
+  async chatStreamCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum,
+    ctx: OnMessageContext | OnCallBackQueryData,
+    msgId: number,
+    limitTokens: boolean): Promise<LlmCompletion> {
+    return await vertexStreamCompletion(conversation,
+      model as LlmsModelsEnum,
+      ctx,
+      msgId,
+      true // telegram messages has a character limit
+    )
+  }
+
+  async chatCompletion (
+    conversation: ChatConversation[],
+    model: LlmsModelsEnum
+  ): Promise<LlmCompletion> {
+    return await vertexCompletion(conversation, model)
+  }
+
+  hasPrefix (prompt: string): string {
+    return (
+      hasGeminiPrefix(prompt) || hasBardPrefix(prompt)
+    )
+  }
+
+  public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    ctx.transient.analytics.module = this.module
+    const isSupportedEvent = this.isSupportedEvent(ctx)
+    if (!isSupportedEvent && ctx.chat?.type !== 'private') {
+      this.logger.warn(`### unsupported command ${ctx.message?.text}`)
+      return
+    }
+    if (ctx.hasCommand([SupportedCommands.bard, SupportedCommands.bardF]) || hasBardPrefix(ctx.message?.text ?? '')) {
+      await this.onChat(ctx, LlmsModelsEnum.BISON, false)
+      return
+    }
+    if (ctx.hasCommand([SupportedCommands.gemini, SupportedCommands.gShort]) || (hasGeminiPrefix(ctx.message?.text ?? '') !== '')) {
+      await this.onChat(ctx, LlmsModelsEnum.GEMINI, true)
+    }
+  }
+}
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
index 4833e6b2..fae23367 100644
--- a/src/modules/open-ai/index.ts
+++ b/src/modules/open-ai/index.ts
@@ -677,7 +677,7 @@ export class OpenAIBot implements PayableBot {
       try {
         const prompt = ctx.session.openAi.chatGpt.requestQueue.shift() ?? ''
         const { chatConversation, model } = ctx.session.openAi.chatGpt
-        const minBalance = await getMinBalance(ctx, ChatGPTModelsEnum.GPT_4_32K)
+        const minBalance = await getMinBalance(ctx, ctx.session.openAi.chatGpt.model)
         if (await this.hasBalance(ctx, minBalance)) {
           if (prompt === '') {
             const msg =
@@ -733,7 +733,7 @@ export class OpenAIBot implements PayableBot {
     while (ctx.session.openAi.imageGen.imgRequestQueue.length > 0) {
       try {
         const img = ctx.session.openAi.imageGen.imgRequestQueue.shift()
-        const minBalance = await getMinBalance(ctx, ChatGPTModelsEnum.GPT_4_32K)
+        const minBalance = await getMinBalance(ctx, ctx.session.openAi.chatGpt.model)
         if (await this.hasBalance(ctx, minBalance)) {
           if (img?.command === 'dalle') {
             await this.onGenImgCmd(img?.prompt, ctx)