Skip to content

Commit

Permalink
🔄 feat: add support for multiple azure inference deployments
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Oct 17, 2024
1 parent 96a5fae commit 1b145b2
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
11 changes: 10 additions & 1 deletion docs/src/content/docs/getting-started/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,16 @@ GENAISCRIPT_DEFAULT_SMALL_MODEL=azure_serverless:<deploymentid>

:::

Note: better support will come in future versions.
### Support for multiple inference deployements

You can update the `AZURE_INFERENCE_CREDENTIAL` with a list of `deploymentid=key` pairs to support multiple deployments (each deployment has a different key).

```txt title=".env"
AZURE_INFERENCE_CREDENTIAL="
model1=key1
model2=key2model3=key3
"
```

## GitHub Copilot Chat Models <a id="github-copilot" href=""></a>

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ export async function parseTokenFromEnv(

if (provider === MODEL_PROVIDER_AZURE_SERVERLESS) {
const tokenVar = "AZURE_INFERENCE_CREDENTIAL"
const token = env[tokenVar]
const token = env[tokenVar]?.trim()
const base = trimTrailingSlash(env.AZURE_INFERENCE_ENDPOINT)
if (!token && !base) return undefined
if (token === PLACEHOLDER_API_KEY)
Expand Down
26 changes: 17 additions & 9 deletions packages/core/src/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,28 @@ import {
} from "./chattypes"
import { resolveTokenEncoder } from "./encoders"
import { toSignal } from "./cancellation"
import { INITryParse } from "./ini"

export function getConfigHeaders(cfg: LanguageModelConfiguration) {
let { token, type } = cfg
if (type === "azure_serverless") {
const keys = INITryParse(token)
if (keys && Object.keys(keys).length > 1) token = keys[cfg.model]
}
const res: Record<string, string> = {
// openai
authorization: /^Bearer /.test(cfg.token)
? cfg.token
: cfg.token &&
(cfg.type === "openai" ||
cfg.type === "localai" ||
cfg.type === "azure_serverless")
? `Bearer ${cfg.token}`
? token
: token &&
(type === "openai" ||
type === "localai" ||
type === "azure_serverless")
? `Bearer ${token}`
: undefined,
// azure
"api-key":
cfg.token && !/^Bearer /.test(cfg.token) && cfg.type === "azure"
? cfg.token
token && !/^Bearer /.test(token) && type === "azure"
? token
: undefined,
"user-agent": TOOL_ID,
}
Expand Down Expand Up @@ -106,7 +112,9 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
let postReq: any = r2

// stream_options fails in some cases
if (model === "gpt-4-turbo-v") delete r2.stream_options
if (model === "gpt-4-turbo-v" || /mistral/i.test(model)) {
delete r2.stream_options
}
if (
req.messages.find(
(msg) =>
Expand Down

0 comments on commit 1b145b2

Please sign in to comment.