diff --git a/apps/web/env.ts b/apps/web/env.ts index bde9a708c..8b3acb563 100644 --- a/apps/web/env.ts +++ b/apps/web/env.ts @@ -49,6 +49,7 @@ export const env = createEnv({ WEBHOOK_URL: z.string().optional(), INTERNAL_API_KEY: z.string().optional(), WHITELIST_FROM: z.string().optional(), + USE_BACKUP_MODEL: z.coerce.boolean().optional().default(false), // license LICENSE_1_SEAT_VARIANT_ID: z.coerce.number().optional(), @@ -111,7 +112,10 @@ export const env = createEnv({ NEXT_PUBLIC_AXIOM_TOKEN: z.string().optional(), NEXT_PUBLIC_BEDROCK_SONNET_MODEL: z .string() - .default("anthropic.claude-3-5-sonnet-20241022-v2:0"), + .default("us.anthropic.claude-3-5-sonnet-20241022-v2:0"), + NEXT_PUBLIC_BEDROCK_HAIKU_MODEL: z + .string() + .default("us.anthropic.claude-3-5-haiku-20241022-v1:0"), NEXT_PUBLIC_OLLAMA_MODEL: z.string().optional(), }, // For Next.js >= 13.4.4, you only need to destructure client variables: @@ -181,6 +185,8 @@ export const env = createEnv({ NEXT_PUBLIC_AXIOM_TOKEN: process.env.NEXT_PUBLIC_AXIOM_TOKEN, NEXT_PUBLIC_BEDROCK_SONNET_MODEL: process.env.NEXT_PUBLIC_BEDROCK_SONNET_MODEL, + NEXT_PUBLIC_BEDROCK_HAIKU_MODEL: + process.env.NEXT_PUBLIC_BEDROCK_HAIKU_MODEL, NEXT_PUBLIC_OLLAMA_MODEL: process.env.NEXT_PUBLIC_OLLAMA_MODEL, }, }); diff --git a/apps/web/utils/error.ts b/apps/web/utils/error.ts index 4be59e538..5b3ffe2ce 100644 --- a/apps/web/utils/error.ts +++ b/apps/web/utils/error.ts @@ -108,6 +108,10 @@ export function isAWSThrottlingError(error: unknown): error is Error { ); } +export function isServiceUnavailableError(error: unknown): error is Error { + return error instanceof Error && error.name === "ServiceUnavailableException"; +} + // we don't want to capture these errors in Sentry export function isKnownApiError(error: unknown): boolean { return ( diff --git a/apps/web/utils/llms/config.ts b/apps/web/utils/llms/config.ts index 9ea69566c..a76e0bf67 100644 --- a/apps/web/utils/llms/config.ts +++ b/apps/web/utils/llms/config.ts @@ -12,6 +12,7 @@ export const Model = { GPT_4O: "gpt-4o", GPT_4O_MINI: "gpt-4o-mini", CLAUDE_3_5_SONNET_BEDROCK: env.NEXT_PUBLIC_BEDROCK_SONNET_MODEL, + CLAUDE_3_5_HAIKU_BEDROCK: env.NEXT_PUBLIC_BEDROCK_HAIKU_MODEL, CLAUDE_3_5_SONNET_ANTHROPIC: "claude-3-5-sonnet-20241022", ...(supportsOllama ? { OLLAMA: env.NEXT_PUBLIC_OLLAMA_MODEL } : {}), }; diff --git a/apps/web/utils/llms/index.ts b/apps/web/utils/llms/index.ts index 346639d81..18b618123 100644 --- a/apps/web/utils/llms/index.ts +++ b/apps/web/utils/llms/index.ts @@ -22,6 +22,7 @@ import { isInvalidOpenAIModelError, isOpenAIAPIKeyDeactivatedError, isOpenAIRetryError, + isServiceUnavailableError, } from "@/utils/error"; import { sleep } from "@/utils/sleep"; @@ -81,21 +82,29 @@ function getModel({ aiProvider, aiModel, aiApiKey }: UserAIFields) { throw new Error("AI provider not supported"); } -export async function chatCompletionObject({ - userAi, - prompt, - system, - schema, - userEmail, - usageLabel, -}: { +type ChatCompletionObjectArgs = { userAi: UserAIFields; prompt: string; system?: string; schema: z.Schema; userEmail: string; usageLabel: string; -}) { +}; + +export async function chatCompletionObject( + options: ChatCompletionObjectArgs, +) { + return withBackupModel(chatCompletionObjectInternal, options); +} + +async function chatCompletionObjectInternal({ + userAi, + prompt, + system, + schema, + userEmail, + usageLabel, +}: ChatCompletionObjectArgs) { try { const { provider, model, llmModel } = getModel(userAi); @@ -141,7 +150,7 @@ export async function chatCompletionStream({ }) { const { provider, model, llmModel } = getModel(userAi); - const result = await streamText({ + const result = streamText({ model: llmModel, prompt, system, @@ -162,15 +171,7 @@ export async function chatCompletionStream({ return result; } -export async function chatCompletionTools({ - userAi, - prompt, - system, - tools, - maxSteps, - label, - userEmail, -}: { +type ChatCompletionToolsArgs = { userAi: UserAIFields; prompt: string; system?: string; @@ -178,7 +179,21 @@ export async function chatCompletionTools({ maxSteps?: number; label: string; userEmail: string; -}) { +}; + +export async function chatCompletionTools(options: ChatCompletionToolsArgs) { + return withBackupModel(chatCompletionToolsInternal, options); +} + +async function chatCompletionToolsInternal({ + userAi, + prompt, + system, + tools, + maxSteps, + label, + userEmail, +}: ChatCompletionToolsArgs) { try { const { provider, model, llmModel } = getModel(userAi); @@ -292,6 +307,28 @@ export async function withRetry( throw lastError; } +// Helps when service is unavailable / throttled / rate limited +async function withBackupModel( + fn: (args: Args) => Promise, + args: Args, +): Promise { + try { + return await fn(args); + } catch (error) { + if (env.USE_BACKUP_MODEL && isServiceUnavailableError(error)) { + return await fn({ + ...args, + userAi: { + aiProvider: Provider.ANTHROPIC, + aiModel: env.NEXT_PUBLIC_BEDROCK_HAIKU_MODEL, + aiApiKey: args.userAi.aiApiKey, + }, + }); + } + throw error; + } +} + async function handleError(error: unknown, userEmail: string) { if (APICallError.isInstance(error)) { if (isIncorrectOpenAIAPIKeyError(error)) { diff --git a/apps/web/utils/usage.ts b/apps/web/utils/usage.ts index 213544037..9e27215df 100644 --- a/apps/web/utils/usage.ts +++ b/apps/web/utils/usage.ts @@ -83,6 +83,14 @@ const costs: Record< input: 3 / 1_000_000, output: 15 / 1_000_000, }, + "anthropic.claude-3-5-haiku-20241022-v1:0": { + input: 0.8 / 1_000_000, + output: 4 / 1_000_000, + }, + "us.anthropic.claude-3-5-haiku-20241022-v1:0": { + input: 0.8 / 1_000_000, + output: 4 / 1_000_000, + }, }; // returns cost in cents