From 91be87d7974eca6ee2f8ce94b147403fa543e852 Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Wed, 2 Oct 2024 13:03:11 -0500
Subject: [PATCH] update models context + refactor models logic to allow
 dynamic commands and prefixes definitions + fix o1 model issues with urls on
 promtp

---
 src/config.ts                              | 13 ++--
 src/modules/llms/api/openai.ts             |  2 +-
 src/modules/llms/claudeBot.ts              | 61 +++------------
 src/modules/llms/llmsBase.ts               | 59 +++++++++++++--
 src/modules/llms/openaiBot.ts              | 58 +++++++-------
 src/modules/llms/utils/llmModelsManager.ts | 61 +++++++++++++--
 src/modules/llms/utils/llmsData.ts         | 88 +++++++---------------
 src/modules/llms/utils/types.ts            | 16 ++++
 src/modules/llms/vertexBot.ts              | 44 +++--------
 9 files changed, 209 insertions(+), 193 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 9601a2a4..09d61672 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -62,14 +62,15 @@ export default {
       }
     },
     chatGpt: {
-      chatCompletionContext:
-        'You are an AI Bot powered by Harmony. Your strengths are ai api aggregation for chat, image, and voice interactions. Leveraging a suite of sophisticated subagents, you have the capability to perform tasks such as internet browsing and accessing various services. Your responses should be adaptable to the conversation while maintaining brevity, ideally not exceeding 100 words.',
+      chatCompletionContext: 'Reply ideally not exceeding 100 words',
+      //  'You are an AI Bot powered by Harmony. Your strengths are ai api aggregation for chat, image, and voice interactions. Leveraging a suite of sophisticated subagents, you have the capability to perform tasks such as internet browsing and accessing various services. Your responses should be adaptable to the conversation while maintaining brevity, ideally not exceeding 100 words.',
       // 'You are an AI Bot powered  dby Harmony. Your strengths are ai api aggregation for chat, image, and voice interactions, and more. You have subagents that helps you with task like browsing the internet, and other services. Respond flexibly, but try to stay within 100 words in all of your responses.',
       webCrawlerContext: 'You will receive a web crawling text. Please get keys concepts, but try to stay within 4000 words in your response.',
-      visionCompletionContext: `You are a concise AI Bot powered by Harmony, capable of providing complete responses within a 100-word limit.
-        For each additional image, extend your response by 30 words. Your responses should be informative and comprehensive, 
-        wrapping up all details without leaving them hanging. Use your flexibility to adapt to any topic, and deliver engaging and fulfilling 
-        conversations in a succinct manner.`,
+      visionCompletionContext: 'Response within a 100-word limit',
+      // `You are a concise AI Bot powered by Harmony, capable of providing complete responses within a 100-word limit.
+      //   For each additional image, extend your response by 30 words. Your responses should be informative and comprehensive,
+      //   wrapping up all details without leaving them hanging. Use your flexibility to adapt to any topic, and deliver engaging and fulfilling
+      //   conversations in a succinct manner.`,
       maxTokens: parseInt(process.env.OPENAI_MAX_TOKENS ?? '800'), // telegram messages has a char limit
       wordLimit: 30,
       wordCountBetween: 10,
diff --git a/src/modules/llms/api/openai.ts b/src/modules/llms/api/openai.ts
index 3bb1cbe9..085271b2 100644
--- a/src/modules/llms/api/openai.ts
+++ b/src/modules/llms/api/openai.ts
@@ -130,7 +130,7 @@ export const streamChatCompletion = async (
     model,
     messages: messages as ChatCompletionMessageParam[], // OpenAI.Chat.Completions.CreateChatCompletionRequestMessage[],
     stream: true,
-    max_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined,
+    max_completion_tokens: limitTokens ? config.openAi.chatGpt.maxTokens : undefined, // max_tokens:
     temperature: config.openAi.dalle.completions.temperature || 0.8
   })
   let wordCount = 0
diff --git a/src/modules/llms/claudeBot.ts b/src/modules/llms/claudeBot.ts
index ab432978..bfae09ba 100644
--- a/src/modules/llms/claudeBot.ts
+++ b/src/modules/llms/claudeBot.ts
@@ -4,18 +4,17 @@ import {
   type OnCallBackQueryData,
   type ChatConversation
 } from '../types'
-import { hasCommandPrefix, SupportedCommands } from './utils/helpers'
+import { SupportedCommands } from './utils/helpers'
 import { type LlmCompletion } from './api/llmApi'
 import { anthropicCompletion, anthropicStreamCompletion, toolsChatCompletion } from './api/athropic'
 import { LlmsBase } from './llmsBase'
 import { type ModelVersion } from './utils/llmModelsManager'
 
 export class ClaudeBot extends LlmsBase {
-  private readonly opusPrefix: string[]
+  private readonly claudeModels: ModelVersion[]
 
   constructor (payments: BotPayments) {
     super(payments, 'ClaudeBot', 'llms')
-    this.opusPrefix = this.modelManager.getPrefixByModel(this.modelsEnum.CLAUDE_3_OPUS) ?? []
   }
 
   public getEstimatedPrice (ctx: any): number {
@@ -25,18 +24,7 @@ export class ClaudeBot extends LlmsBase {
   public isSupportedEvent (
     ctx: OnMessageContext | OnCallBackQueryData
   ): boolean {
-    const hasCommand = ctx.hasCommand([
-      this.commandsEnum.CLAUDE,
-      this.commandsEnum.OPUS,
-      this.commandsEnum.O,
-      this.commandsEnum.C,
-      this.commandsEnum.CTOOL,
-      this.commandsEnum.STOOL,
-      this.commandsEnum.CLAUDES,
-      this.commandsEnum.SONNET,
-      this.commandsEnum.S,
-      this.commandsEnum.HAIKU,
-      this.commandsEnum.H])
+    const hasCommand = ctx.hasCommand(this.supportedCommands)
 
     if (ctx.hasCommand(SupportedCommands.new) && this.checkModel(ctx)) {
       return true
@@ -48,12 +36,6 @@ export class ClaudeBot extends LlmsBase {
     return hasCommand
   }
 
-  hasPrefix (prompt: string): string {
-    return (
-      hasCommandPrefix(prompt, this.opusPrefix)
-    )
-  }
-
   async chatStreamCompletion (
     conversation: ChatConversation[],
     model: ModelVersion,
@@ -87,16 +69,7 @@ export class ClaudeBot extends LlmsBase {
       this.logger.warn(`### unsupported command ${ctx.message?.text}`)
       return
     }
-    if (ctx.hasCommand([this.commandsEnum.CTOOL])) {
-      this.updateSessionModel(ctx, this.modelsEnum.CLAUDE_3_OPUS)
-      await this.onChat(ctx, this.modelsEnum.CLAUDE_3_OPUS, false, true)
-      return
-    }
-    if (ctx.hasCommand([this.commandsEnum.STOOL])) {
-      this.updateSessionModel(ctx, this.modelsEnum.CLAUDE_35_SONNET)
-      await this.onChat(ctx, this.modelsEnum.CLAUDE_35_SONNET, false, true)
-      return
-    }
+
     if (
       (ctx.hasCommand(SupportedCommands.new) && this.checkModel(ctx))
     ) {
@@ -104,25 +77,15 @@ export class ClaudeBot extends LlmsBase {
       await this.onChat(ctx, this.modelsEnum.CLAUDE_3_OPUS, true, false)
       return
     }
-    if (ctx.hasCommand([
-      this.commandsEnum.CLAUDE,
-      this.commandsEnum.OPUS,
-      this.commandsEnum.O,
-      this.commandsEnum.C]) ||
-      (hasCommandPrefix(ctx.message?.text ?? '', this.opusPrefix) !== '')
-    ) {
-      this.updateSessionModel(ctx, this.modelsEnum.CLAUDE_3_OPUS)
-      await this.onChat(ctx, this.modelsEnum.CLAUDE_3_OPUS, true, false)
-      return
-    }
-    if (ctx.hasCommand([this.commandsEnum.CLAUDES, this.commandsEnum.SONNET, this.commandsEnum.S])) {
-      this.updateSessionModel(ctx, this.modelsEnum.CLAUDE_35_SONNET)
-      await this.onChat(ctx, this.modelsEnum.CLAUDE_35_SONNET, true, false)
+
+    const model = this.getModelFromContext(ctx)
+    if (!model) {
+      this.logger.warn(`### unsupported model for command ${ctx.message?.text}`)
       return
     }
-    if (ctx.hasCommand([this.commandsEnum.HAIKU, this.commandsEnum.H])) {
-      this.updateSessionModel(ctx, this.modelsEnum.CLAUDE_3_HAIKU)
-      await this.onChat(ctx, this.modelsEnum.CLAUDE_3_HAIKU, false, false)
-    }
+    this.updateSessionModel(ctx, model.version)
+
+    const usesTools = ctx.hasCommand([this.commandsEnum.CTOOL, this.commandsEnum.STOOL])
+    await this.onChat(ctx, model.version, usesTools ? false : this.getStreamOption(model.version), usesTools)
   }
 }
diff --git a/src/modules/llms/llmsBase.ts b/src/modules/llms/llmsBase.ts
index a3b81e19..ccadfaba 100644
--- a/src/modules/llms/llmsBase.ts
+++ b/src/modules/llms/llmsBase.ts
@@ -27,7 +27,7 @@ import {
 import { type LlmCompletion, deleteCollection } from './api/llmApi'
 import * as Sentry from '@sentry/node'
 import { now } from '../../utils/perf'
-import { type LLMModel } from './utils/types'
+import { type ChatModel, type LLMModel } from './utils/types'
 import { ErrorHandler } from '../errorhandler'
 import { SubagentBase } from '../subagents/subagentBase'
 import {
@@ -48,7 +48,11 @@ export abstract class LlmsBase implements PayableBot {
   protected commandsEnum = LlmCommandsEnum
   protected subagents: SubagentBase[]
   protected botSuspended: boolean
-  protected supportedModels: LLMModel[] //  LlmsModelsEnum[]
+  protected supportedModels: LLMModel[]
+  protected supportedCommands: string[]
+  protected supportedPrefixes: string[]
+  protected botName: string
+
   errorHandler: ErrorHandler
 
   constructor (payments: BotPayments,
@@ -57,6 +61,7 @@ export abstract class LlmsBase implements PayableBot {
     subagents?: SubagentBase[]
   ) {
     this.module = module
+    this.botName = module
     this.logger = pino({
       name: this.module,
       transport: {
@@ -70,9 +75,29 @@ export abstract class LlmsBase implements PayableBot {
     this.payments = payments
     this.subagents = subagents ?? []
     this.errorHandler = new ErrorHandler()
+    this.supportedModels = this.initSupportedModels()
+    this.supportedCommands = this.initSupportedCommands()
+    this.supportedPrefixes = this.initSupportedPrefixes()
+  }
+
+  private initSupportedModels (): LLMModel[] {
+    return this.modelManager.getModelsByBot(this.botName)
+  }
+
+  private initSupportedCommands (): string[] {
+    return this.supportedModels
+      .filter(model => model.botName === this.botName)
+      .flatMap(model => model.commands)
+  }
+
+  private initSupportedPrefixes (): string[] {
+    return this.supportedModels
+      .filter(model => model.botName === this.botName)
+      .flatMap(model => this.modelManager.getPrefixByModel(model.version) ?? [])
   }
 
   public abstract onEvent (ctx: OnMessageContext | OnCallBackQueryData, refundCallback: (reason?: string) => void): Promise<void>
+
   public abstract isSupportedEvent (
     ctx: OnMessageContext | OnCallBackQueryData
   ): boolean
@@ -92,7 +117,29 @@ export abstract class LlmsBase implements PayableBot {
     usesTools: boolean
   ): Promise<LlmCompletion>
 
-  protected abstract hasPrefix (prompt: string): string
+  // protected abstract hasPrefix (prompt: string): string
+  protected hasPrefix (prompt: string): string {
+    return this.supportedPrefixes.find(prefix => prompt.toLocaleLowerCase().startsWith(prefix)) ?? ''
+  }
+
+  protected getStreamOption (model: ModelVersion): boolean {
+    const foundModel = this.supportedModels.find(m => m.version === model) as ChatModel | undefined
+    return foundModel?.stream ?? false
+  }
+
+  protected getModelFromContext (ctx: OnMessageContext | OnCallBackQueryData): LLMModel | undefined {
+    for (const model of this.supportedModels) {
+      if (model.botName !== this.botName) continue
+      if (ctx.hasCommand(model.commands)) {
+        return model
+      }
+      const prefix = this.modelManager.getPrefixByModel(model.version)
+      if (prefix && prefix.some(p => (ctx.message?.text ?? '').startsWith(p))) {
+        return model
+      }
+    }
+    return undefined
+  }
 
   addSubagents (subagents: SubagentBase[]): void {
     this.subagents = subagents
@@ -110,7 +157,7 @@ export abstract class LlmsBase implements PayableBot {
     return !!this.supportedModels.find(model => model.version === ctx.session.currentModel)
   }
 
-  protected async runSubagents (ctx: OnMessageContext | OnCallBackQueryData, msg: ChatConversation): Promise<void> {
+  protected async runSubagents (ctx: OnMessageContext | OnCallBackQueryData, msg: ChatConversation, stream: boolean, usesTools: boolean): Promise<void> {
     const session = this.getSession(ctx)
     await Promise.all(this.subagents.map(async (agent: SubagentBase) =>
       await agent.run(ctx, msg)))
@@ -119,7 +166,7 @@ export abstract class LlmsBase implements PayableBot {
       session.requestQueue.push(msg)
       if (!session.isProcessingQueue) {
         session.isProcessingQueue = true
-        await this.onChatRequestHandler(ctx, true, false).then(() => {
+        await this.onChatRequestHandler(ctx, stream, usesTools).then(() => {
           session.isProcessingQueue = false
         })
       }
@@ -167,7 +214,7 @@ export abstract class LlmsBase implements PayableBot {
           content: prompt as string ?? '', // await preparePrompt(ctx, prompt as string),
           numSubAgents: supportedAgents
         }
-        await this.runSubagents(ctx, msg) //  prompt as string)
+        await this.runSubagents(ctx, msg, stream, usesTools) //  prompt as string)
       }
       ctx.transient.analytics.actualResponseTime = now()
     } catch (e: any) {
diff --git a/src/modules/llms/openaiBot.ts b/src/modules/llms/openaiBot.ts
index d5a860ad..defe68b9 100644
--- a/src/modules/llms/openaiBot.ts
+++ b/src/modules/llms/openaiBot.ts
@@ -6,7 +6,6 @@ import {
   RequestState
 } from '../types'
 import {
-  hasCommandPrefix,
   hasNewPrefix,
   isMentioned,
   sendMessage,
@@ -30,7 +29,7 @@ export class OpenAIBot extends LlmsBase {
 
   constructor (payments: BotPayments, subagents?: SubagentBase[]) {
     super(payments, 'OpenAIBot', 'chatGpt', subagents)
-    this.gpt4oPrefix = this.modelManager.getPrefixByModel(this.modelsEnum.GPT_4O) ?? []
+    // this.gpt4oPrefix = this.modelManager.getPrefixByModel(this.modelsEnum.GPT_4O) ?? []
     if (!config.openAi.dalle.isEnabled) {
       this.logger.warn('DALL·E 2 Image Bot is disabled in config')
     }
@@ -49,7 +48,7 @@ export class OpenAIBot extends LlmsBase {
   public isSupportedEvent (
     ctx: OnMessageContext | OnCallBackQueryData
   ): boolean {
-    const commands = ['last', ...this.modelManager.getCommandsByProvider('openai')]
+    const commands = ['last', ...this.supportedCommands]
     const hasCommand = ctx.hasCommand(commands)
     if (ctx.hasCommand(SupportedCommands.new) && this.checkModel(ctx)) {
       return true
@@ -90,7 +89,8 @@ export class OpenAIBot extends LlmsBase {
 
   hasPrefix (prompt: string): string {
     return (
-      hasCommandPrefix(prompt, this.gpt4oPrefix) || hasNewPrefix(prompt) // hasDallePrefix(prompt)
+      this.supportedPrefixes.find(prefix => prompt.toLocaleLowerCase().startsWith(prefix)) ??
+      hasNewPrefix(prompt) // hasDallePrefix(prompt)
     )
   }
 
@@ -106,19 +106,9 @@ export class OpenAIBot extends LlmsBase {
       return
     }
 
-    if (
-      ctx.hasCommand([
-        this.commandsEnum.CHAT,
-        this.commandsEnum.ASK,
-        this.commandsEnum.GPT,
-        this.commandsEnum.GPTO
-      ]) ||
-      hasCommandPrefix(ctx.message?.text ?? '', this.gpt4oPrefix) ||
-      isMentioned(ctx) ||
-      ((ctx.message?.text?.startsWith('chat ') ??
+    if ((ctx.message?.text?.startsWith('chat ') ??
         ctx.message?.text?.startsWith('ask ')) &&
-        ctx.chat?.type === 'private')
-    ) {
+        ctx.chat?.type === 'private') {
       this.updateSessionModel(ctx, this.modelsEnum.GPT_4O)
       await this.onChat(ctx, this.modelsEnum.GPT_4O, true, false)
       return
@@ -140,30 +130,36 @@ export class OpenAIBot extends LlmsBase {
       return
     }
 
-    if (ctx.hasCommand(this.commandsEnum.ASK35)) {
-      this.updateSessionModel(ctx, this.modelsEnum.GPT_35_TURBO)
-      await this.onChat(ctx, this.modelsEnum.GPT_35_TURBO, true, false)
-      return
-    }
+    // if (ctx.hasCommand(this.commandsEnum.ASK35)) {
+    //   this.updateSessionModel(ctx, this.modelsEnum.GPT_35_TURBO)
+    //   await this.onChat(ctx, this.modelsEnum.GPT_35_TURBO, true, false)
+    //   return
+    // }
+
+    // if (ctx.hasCommand(this.commandsEnum.GPT4)) {
+    //   this.updateSessionModel(ctx, this.modelsEnum.GPT_4)
+    //   await this.onChat(ctx, this.modelsEnum.GPT_4, true, false)
+    //   return
+    // }
+
+    // if (ctx.hasCommand([this.commandsEnum.O1, this.commandsEnum.ASK1])) {
+    //   this.updateSessionModel(ctx, this.modelsEnum.O1)
+    //   await this.onChat(ctx, this.modelsEnum.O1, false, false)
+    //   return
+    // }
 
-    if (ctx.hasCommand(this.commandsEnum.GPT4)) {
-      this.updateSessionModel(ctx, this.modelsEnum.GPT_4)
-      await this.onChat(ctx, this.modelsEnum.GPT_4, true, false)
+    const model = this.getModelFromContext(ctx)
+    if (model) {
+      this.updateSessionModel(ctx, model.version)
+      await this.onChat(ctx, model.version, this.getStreamOption(model.version), false)
       return
     }
-
     // if (ctx.hasCommand(this.commandsEnum.ASK32)) {
     //   this.updateSessionModel(ctx, this.modelsEnum.GPT_4_32K)
     //   await this.onChat(ctx, this.modelsEnum.GPT_4_32K, true, false)
     //   return
     // }
 
-    if (ctx.hasCommand([this.commandsEnum.O1, this.commandsEnum.ASK1])) {
-      this.updateSessionModel(ctx, this.modelsEnum.O1)
-      await this.onChat(ctx, this.modelsEnum.O1, false, false)
-      return
-    }
-
     if (ctx.hasCommand(SupportedCommands.last)) {
       await this.onLast(ctx)
       return
diff --git a/src/modules/llms/utils/llmModelsManager.ts b/src/modules/llms/utils/llmModelsManager.ts
index 4a03796c..3a27b8a1 100644
--- a/src/modules/llms/utils/llmModelsManager.ts
+++ b/src/modules/llms/utils/llmModelsManager.ts
@@ -3,18 +3,24 @@ import {
   type Provider,
   type LLMData,
   type LLMModel,
-  type ImageModel
+  type ImageModel,
+  type ModelCommandMap,
+  type ModelPrefixMap
 } from './types'
 
 export class LLMModelsManager {
   private readonly models = new Map<string, LLMModel>()
   private readonly modelsEnum: Record<string, string>
   private readonly commandsEnum: Record<string, string>
+  private readonly commandMap: ModelCommandMap
+  private readonly prefixMap: ModelPrefixMap
 
   constructor (llmData: LLMData) {
     this.loadModels(llmData)
     this.modelsEnum = this.createModelsEnum()
     this.commandsEnum = this.createCommandsEnum()
+    this.commandMap = this.buildCommandMap()
+    this.prefixMap = this.buildPrefixMap()
   }
 
   private loadModels (data: LLMData): void {
@@ -22,6 +28,43 @@ export class LLMModelsManager {
     Object.values(data.imageModels).forEach(model => { this.addModel(model) })
   }
 
+  private buildCommandMap (): ModelCommandMap {
+    const commandMap: ModelCommandMap = {}
+    this.models.forEach((model, version) => {
+      model.commands.forEach(command => {
+        commandMap[command] = {
+          model: version,
+          useTools: command === 'ctool' || command === 'stool',
+          stream: 'stream' in model ? model.stream : true
+        }
+      })
+    })
+    return commandMap
+  }
+
+  private buildPrefixMap (): ModelPrefixMap {
+    const prefixMap: ModelPrefixMap = {}
+    this.models.forEach((model, version) => {
+      if (model.prefix) {
+        model.prefix.forEach(prefix => {
+          prefixMap[prefix] = {
+            model: version,
+            stream: 'stream' in model ? model.stream : true
+          }
+        })
+      }
+    })
+    return prefixMap
+  }
+
+  getCommandMap (): ModelCommandMap {
+    return this.commandMap
+  }
+
+  getPrefixMap (): ModelPrefixMap {
+    return this.prefixMap
+  }
+
   addModel (model: LLMModel): void {
     this.models.set(model.version, model)
   }
@@ -46,6 +89,16 @@ export class LLMModelsManager {
     return commandsEnum
   }
 
+  getModelByCommand (command: string): LLMModel | undefined {
+    const modelInfo = this.commandMap[command]
+    return modelInfo ? this.getModel(modelInfo.model) : undefined
+  }
+
+  getModelByPrefix (prefix: string): LLMModel | undefined {
+    const modelInfo = this.prefixMap[prefix]
+    return modelInfo ? this.getModel(modelInfo.model) : undefined
+  }
+
   getModel (version: string): LLMModel | undefined {
     return this.models.get(version)
   }
@@ -129,12 +182,6 @@ export class LLMModelsManager {
     return this.models.get(version)?.prefix
   }
 
-  getModelByCommand (command: string): LLMModel | undefined {
-    return Array.from(this.models.values()).find(model =>
-      model.commands.includes(command)
-    )
-  }
-
   generateTelegramOutput (): string {
     let output = ''
     const providers = Array.from(new Set(this.getAllModels().map(model => model.provider)))
diff --git a/src/modules/llms/utils/llmsData.ts b/src/modules/llms/utils/llmsData.ts
index c089ddf3..4d6a8356 100644
--- a/src/modules/llms/utils/llmsData.ts
+++ b/src/modules/llms/utils/llmsData.ts
@@ -2,18 +2,6 @@ import { type LLMData } from './types'
 
 export const llmData: LLMData = {
   chatModels: {
-    // 'chat-bison': {
-    //   provider: 'vertex',
-    //   name: 'chat-bison',
-    //   fullName: 'chat-bison',
-    //   version: 'chat-bison',
-    //   commands: ['bison', 'b'],
-    //   apiSpec: 'https://example.com/chat-bison-api-spec',
-    //   inputPrice: 0.03,
-    //   outputPrice: 0.06,
-    //   maxContextTokens: 8192,
-    //   chargeType: 'CHAR'
-    // },
     'gemini-10': {
       provider: 'vertex',
       name: 'gemini-10',
@@ -26,7 +14,8 @@ export const llmData: LLMData = {
       inputPrice: 0.000125,
       outputPrice: 0.000375,
       maxContextTokens: 30720,
-      chargeType: 'CHAR'
+      chargeType: 'CHAR',
+      stream: true
     },
     'gemini-15': {
       provider: 'vertex',
@@ -39,20 +28,9 @@ export const llmData: LLMData = {
       inputPrice: 0.0025,
       outputPrice: 0.0075,
       maxContextTokens: 1048576,
-      chargeType: 'CHAR'
+      chargeType: 'CHAR',
+      stream: true
     },
-    // 'j2-ultra': {
-    //   provider: 'jurassic',
-    //   name: 'j2_Ultra',
-    //   fullName: 'j2-ultra',
-    //   version: 'j2-ultra',
-    //   commands: ['j2ultra'],
-    //   apiSpec: 'https://example.com/j2-ultra-api-spec',
-    //   inputPrice: 0.06,
-    //   outputPrice: 0.12,
-    //   maxContextTokens: 32000,
-    //   chargeType: 'TOKEN'
-    // },
     'claude-3-opus': {
       provider: 'claude',
       name: 'claude-3-opus',
@@ -65,7 +43,8 @@ export const llmData: LLMData = {
       inputPrice: 0.015,
       outputPrice: 0.075,
       maxContextTokens: 4096,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
     'claude-35-sonnet': {
       provider: 'claude',
@@ -74,11 +53,13 @@ export const llmData: LLMData = {
       botName: 'ClaudeBot',
       version: 'claude-3-5-sonnet-20240620',
       commands: ['sonnet', 'claudes', 's', 'stool'],
+      prefix: ['s. '],
       apiSpec: 'https://www.anthropic.com/news/claude-3-5-sonnet',
       inputPrice: 0.003,
       outputPrice: 0.015,
       maxContextTokens: 8192,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
     'claude-3-haiku': {
       provider: 'claude',
@@ -87,11 +68,13 @@ export const llmData: LLMData = {
       botName: 'ClaudeBot',
       version: 'claude-3-haiku-20240307',
       commands: ['haiku', 'h'],
+      prefix: ['h. '],
       apiSpec: 'https://www.anthropic.com/news/claude-3-family',
       inputPrice: 0.00025,
       outputPrice: 0.00125,
       maxContextTokens: 4096,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
     'gpt-4': {
       provider: 'openai',
@@ -104,20 +87,9 @@ export const llmData: LLMData = {
       inputPrice: 0.03,
       outputPrice: 0.06,
       maxContextTokens: 8192,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
-    // 'gpt-4-32k': {
-    //   provider: 'openai',
-    //   name: 'gpt-4-32k',
-    //   fullName: 'GPT-4 32k',
-    //   version: 'gpt-4-32k',
-    //   commands: ['gpt4-32k', 'ask32'],
-    //   apiSpec: 'https://example.com/gpt-4-32k-api-spec',
-    //   inputPrice: 0.06,
-    //   outputPrice: 0.12,
-    //   maxContextTokens: 32000,
-    //   chargeType: 'TOKEN'
-    // },
     'gpt-35-turbo': {
       provider: 'openai',
       name: 'gpt-35-turbo',
@@ -129,31 +101,23 @@ export const llmData: LLMData = {
       inputPrice: 0.0015,
       outputPrice: 0.002,
       maxContextTokens: 4000,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
-    // 'gpt-35-turbo-16k': {
-    //   provider: 'openai',
-    //   name: 'GPT-3.5 Turbo 16k',
-    //   version: 'gpt-3.5-turbo-16k',
-    //   commands: ['gpt35-16k'],
-    //   apiSpec: 'https://example.com/gpt-3.5-turbo-16k-api-spec',
-    //   inputPrice: 0.003,
-    //   outputPrice: 0.004,
-    //   maxContextTokens: 16000,
-    //   chargeType: 'TOKEN'
-    // },
     'gpt-4-vision': {
       provider: 'openai',
       name: 'gpt-4-vision',
       fullName: 'GPT-4 Vision',
       botName: 'OpenAIBot',
       version: 'gpt-4-vision-preview',
-      commands: ['vision'],
+      commands: ['vision, v'],
+      prefix: ['v. '],
       apiSpec: 'https://platform.openai.com/docs/guides/vision',
       inputPrice: 0.03,
       outputPrice: 0.06,
       maxContextTokens: 16000,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
     'gpt-4o': {
       provider: 'openai',
@@ -161,13 +125,14 @@ export const llmData: LLMData = {
       fullName: 'GPT-4o',
       botName: 'OpenAIBot',
       version: 'gpt-4o',
-      commands: ['gpto', 'ask', 'chat', 'gpt'],
+      commands: ['gpto', 'ask', 'chat', 'gpt', 'a'],
       prefix: ['a. ', '. '],
       apiSpec: 'https://platform.openai.com/docs/models/gpt-4o',
       inputPrice: 0.005,
       outputPrice: 0.0015,
       maxContextTokens: 128000,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: true
     },
     o1: {
       provider: 'openai',
@@ -176,11 +141,13 @@ export const llmData: LLMData = {
       botName: 'OpenAIBot',
       version: 'o1-preview',
       commands: ['o1', 'ask1'],
+      prefix: ['o1. '],
       apiSpec: 'https://platform.openai.com/docs/models/o1',
       inputPrice: 0.015,
       outputPrice: 0.06,
       maxContextTokens: 128000,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: false
     },
     'o1-mini': {
       provider: 'openai',
@@ -193,7 +160,8 @@ export const llmData: LLMData = {
       inputPrice: 0.003,
       outputPrice: 0.012,
       maxContextTokens: 128000,
-      chargeType: 'TOKEN'
+      chargeType: 'TOKEN',
+      stream: false
     }
   },
   imageModels: {
diff --git a/src/modules/llms/utils/types.ts b/src/modules/llms/utils/types.ts
index 36a59e07..81e31d08 100644
--- a/src/modules/llms/utils/types.ts
+++ b/src/modules/llms/utils/types.ts
@@ -20,6 +20,7 @@ export interface ChatModel extends BaseModel {
   outputPrice: number
   maxContextTokens: number
   chargeType: ChargeType
+  stream: boolean
 }
 
 export interface ImageModel extends BaseModel {
@@ -33,3 +34,18 @@ export interface LLMData {
   chatModels: Record<string, ChatModel>
   imageModels: Record<string, ImageModel>
 }
+
+interface ModelCommandConfig {
+  model: string
+  useTools: boolean
+  stream: boolean
+}
+
+export interface ModelCommandMap extends Record<string, ModelCommandConfig> {}
+
+interface ModelPrefixConfig {
+  model: string
+  stream: boolean
+}
+
+export interface ModelPrefixMap extends Record<string, ModelPrefixConfig> {}
diff --git a/src/modules/llms/vertexBot.ts b/src/modules/llms/vertexBot.ts
index 0c8c375d..384b79f4 100644
--- a/src/modules/llms/vertexBot.ts
+++ b/src/modules/llms/vertexBot.ts
@@ -5,7 +5,6 @@ import {
   type ChatConversation
 } from '../types'
 import {
-  hasCommandPrefix,
   isMentioned,
   SupportedCommands
 } from './utils/helpers'
@@ -14,16 +13,11 @@ import { type LlmCompletion } from './api/llmApi'
 import { LlmsBase } from './llmsBase'
 import { vertexCompletion, vertexStreamCompletion } from './api/vertex'
 import { type SubagentBase } from '../subagents'
-import {
-  LlmModelsEnum,
-  type ModelVersion
-} from './utils/llmModelsManager'
+import { type ModelVersion } from './utils/llmModelsManager'
 
 export class VertexBot extends LlmsBase {
-  private readonly geminiPrefix: string[]
   constructor (payments: BotPayments, subagents?: SubagentBase[]) {
     super(payments, 'VertexBot', 'llms', subagents)
-    this.geminiPrefix = this.modelManager.getPrefixByModel(LlmModelsEnum.GEMINI_10) ?? []
   }
 
   public getEstimatedPrice (ctx: any): number {
@@ -33,11 +27,7 @@ export class VertexBot extends LlmsBase {
   public isSupportedEvent (
     ctx: OnMessageContext | OnCallBackQueryData
   ): boolean {
-    const hasCommand = ctx.hasCommand([
-      this.commandsEnum.GEMINI,
-      this.commandsEnum.G,
-      this.commandsEnum.G15,
-      this.commandsEnum.GEMINI15])
+    const hasCommand = ctx.hasCommand(this.supportedCommands)
     if (isMentioned(ctx)) {
       return true
     }
@@ -69,12 +59,6 @@ export class VertexBot extends LlmsBase {
     return await vertexCompletion(conversation, model)
   }
 
-  hasPrefix (prompt: string): string {
-    return (
-      hasCommandPrefix(prompt, this.geminiPrefix)
-    )
-  }
-
   public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
     ctx.transient.analytics.module = this.module
     const isSupportedEvent = this.isSupportedEvent(ctx)
@@ -82,24 +66,18 @@ export class VertexBot extends LlmsBase {
       this.logger.warn(`### unsupported command ${ctx.message?.text}`)
       return
     }
-    // if (ctx.hasCommand([SupportedCommands.bard, SupportedCommands.bardF]) || hasBardPrefix(ctx.message?.text ?? '')) {
-    //   this.updateSessionModel(ctx, LlmsModelsEnum.BISON)
-    //   await this.onChat(ctx, LlmsModelsEnum.BISON, false, false)
-    //   return
-    // }
-    if (ctx.hasCommand([this.commandsEnum.GEMINI, this.commandsEnum.G]) || (hasCommandPrefix(ctx.message?.text ?? '', this.geminiPrefix))) {
-      this.updateSessionModel(ctx, LlmModelsEnum.GEMINI_10)
-      await this.onChat(ctx, LlmModelsEnum.GEMINI_10, true, false)
-      return
-    }
-    if (ctx.hasCommand([this.commandsEnum.GEMINI15, this.commandsEnum.G15])) {
-      this.updateSessionModel(ctx, LlmModelsEnum.GEMINI_15)
-      await this.onChat(ctx, LlmModelsEnum.GEMINI_15, true, false)
-      // return
-    }
 
     if (ctx.hasCommand([SupportedCommands.pdf, SupportedCommands.ctx]) && this.checkModel(ctx)) {
       await this.onChat(ctx, ctx.session.currentModel, true, false)
     }
+
+    const model = this.getModelFromContext(ctx)
+    if (!model) {
+      this.logger.warn(`### unsupported model for command ${ctx.message?.text}`)
+      return
+    }
+    this.updateSessionModel(ctx, model.version)
+
+    await this.onChat(ctx, model.version, this.getStreamOption(model.version), false)
   }
 }