From 1b145b234a3fb05f0ef6b7b4c9ddee2e2d479501 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Thu, 17 Oct 2024 01:25:18 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20feat:=20add=20support=20for=20mu?= =?UTF-8?q?ltiple=20azure=20inference=20deployments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/getting-started/configuration.mdx | 11 +++++++- packages/core/src/connection.ts | 2 +- packages/core/src/openai.ts | 26 ++++++++++++------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/docs/src/content/docs/getting-started/configuration.mdx b/docs/src/content/docs/getting-started/configuration.mdx index 732aeedbe8..16e109571c 100644 --- a/docs/src/content/docs/getting-started/configuration.mdx +++ b/docs/src/content/docs/getting-started/configuration.mdx @@ -426,7 +426,16 @@ GENAISCRIPT_DEFAULT_SMALL_MODEL=azure_serverless: ::: -Note: better support will come in future versions. +### Support for multiple inference deployements + +You can update the `AZURE_INFERENCE_CREDENTIAL` with a list of `deploymentid=key` pairs to support multiple deployments (each deployment has a different key). + +```txt title=".env" +AZURE_INFERENCE_CREDENTIAL=" +model1=key1 +model2=key2model3=key3 +" +``` ## GitHub Copilot Chat Models diff --git a/packages/core/src/connection.ts b/packages/core/src/connection.ts index f0530727d6..f5b761d14a 100644 --- a/packages/core/src/connection.ts +++ b/packages/core/src/connection.ts @@ -183,7 +183,7 @@ export async function parseTokenFromEnv( if (provider === MODEL_PROVIDER_AZURE_SERVERLESS) { const tokenVar = "AZURE_INFERENCE_CREDENTIAL" - const token = env[tokenVar] + const token = env[tokenVar]?.trim() const base = trimTrailingSlash(env.AZURE_INFERENCE_ENDPOINT) if (!token && !base) return undefined if (token === PLACEHOLDER_API_KEY) diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index afd6d1c1f6..a50d74b9b0 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -23,22 +23,28 @@ import { } from "./chattypes" import { resolveTokenEncoder } from "./encoders" import { toSignal } from "./cancellation" +import { INITryParse } from "./ini" export function getConfigHeaders(cfg: LanguageModelConfiguration) { + let { token, type } = cfg + if (type === "azure_serverless") { + const keys = INITryParse(token) + if (keys && Object.keys(keys).length > 1) token = keys[cfg.model] + } const res: Record = { // openai authorization: /^Bearer /.test(cfg.token) - ? cfg.token - : cfg.token && - (cfg.type === "openai" || - cfg.type === "localai" || - cfg.type === "azure_serverless") - ? `Bearer ${cfg.token}` + ? token + : token && + (type === "openai" || + type === "localai" || + type === "azure_serverless") + ? `Bearer ${token}` : undefined, // azure "api-key": - cfg.token && !/^Bearer /.test(cfg.token) && cfg.type === "azure" - ? cfg.token + token && !/^Bearer /.test(token) && type === "azure" + ? token : undefined, "user-agent": TOOL_ID, } @@ -106,7 +112,9 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( let postReq: any = r2 // stream_options fails in some cases - if (model === "gpt-4-turbo-v") delete r2.stream_options + if (model === "gpt-4-turbo-v" || /mistral/i.test(model)) { + delete r2.stream_options + } if ( req.messages.find( (msg) =>