diff --git a/docs/src/content/docs/getting-started/configuration.mdx b/docs/src/content/docs/getting-started/configuration.mdx index 150ea816cc..63a362de56 100644 --- a/docs/src/content/docs/getting-started/configuration.mdx +++ b/docs/src/content/docs/getting-started/configuration.mdx @@ -1184,6 +1184,10 @@ script({ Follow [this guide](https://huggingface.co/blog/yagilb/lms-hf) to load Hugging Face models into LMStudio. +## Jan + +The `jan` provider connects to the [Jan](https://jan.ai/) local server. + ## LocalAI [LocalAI](https://localai.io/) act as a drop-in replacement REST API that’s compatible @@ -1233,9 +1237,8 @@ that allows you to run an LLM locally. The provider is `llamafile` and the model name is ignored. -## Jan, LLaMA.cpp +## LLaMA.cpp -[Jan](https://jan.ai/), [LLaMA.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) also allow running models locally or interfacing with other LLM vendors. diff --git a/packages/cli/src/nodehost.ts b/packages/cli/src/nodehost.ts index aab1454417..4972b8d337 100644 --- a/packages/cli/src/nodehost.ts +++ b/packages/cli/src/nodehost.ts @@ -96,10 +96,10 @@ class ModelManager implements ModelService { ): Promise { const { trace } = options || {} const { provider, model } = parseModelIdentifier(modelid) - if (provider === MODEL_PROVIDER_OLLAMA) { - if (this.pulled.includes(modelid)) return { ok: true } + if (this.pulled.includes(modelid)) return { ok: true } - if (!isQuiet) logVerbose(`ollama pull ${model}`) + if (provider === MODEL_PROVIDER_OLLAMA) { + logVerbose(`ollama: pull ${model}`) try { const conn = await this.getModelToken(modelid) const res = await fetch(`${conn.base}/api/pull`, { @@ -120,8 +120,8 @@ class ModelManager implements ModelService { if (res.ok) this.pulled.push(modelid) return { ok: res.ok, status: res.status } } catch (e) { - logError(`failed to pull model ${model}`) - trace?.error("pull model failed", e) + logError(`ollama: failed to pull model ${model}`) + trace?.error("ollama: pull model failed", e) return { ok: false, status: 500, error: serializeError(e) } } } diff --git a/packages/core/src/connection.ts b/packages/core/src/connection.ts index 447f712f4f..7023d257d4 100644 --- a/packages/core/src/connection.ts +++ b/packages/core/src/connection.ts @@ -32,8 +32,9 @@ import { MISTRAL_API_BASE, MODEL_PROVIDER_LMSTUDIO, LMSTUDIO_API_BASE, + MODEL_PROVIDER_JAN, + JAN_API_BASE, } from "./constants" -import { fileExists, readText, writeText } from "./fs" import { OpenAIAPIType, host, @@ -481,6 +482,19 @@ export async function parseTokenFromEnv( } } + if (provider === MODEL_PROVIDER_JAN) { + const base = findEnvVar(env, "JAN", BASE_SUFFIX)?.value || JAN_API_BASE + if (!URL.canParse(base)) throw new Error(`${base} must be a valid URL`) + return { + provider, + model, + base, + token: "lmstudio", + type: "openai", + source: "env: JAN_API_...", + } + } + if (provider === MODEL_PROVIDER_TRANSFORMERS) { return { provider, diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 3c02fd3b01..c244d36e8d 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -154,6 +154,7 @@ export const ALIBABA_BASE = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" export const MISTRAL_API_BASE = "https://api.mistral.ai/v1" export const LMSTUDIO_API_BASE = "http://localhost:1234/v1" +export const JAN_API_BASE = "http://localhost:1337/v1" export const PROMPTFOO_CACHE_PATH = ".genaiscript/cache/tests" export const PROMPTFOO_CONFIG_DIR = ".genaiscript/config/tests" @@ -184,6 +185,7 @@ export const MODEL_PROVIDER_TRANSFORMERS = "transformers" export const MODEL_PROVIDER_ALIBABA = "alibaba" export const MODEL_PROVIDER_MISTRAL = "mistral" export const MODEL_PROVIDER_LMSTUDIO = "lmstudio" +export const MODEL_PROVIDER_JAN = "jan" export const TRACE_FILE_PREVIEW_MAX_LENGTH = 240 @@ -210,6 +212,8 @@ export const DOCS_CONFIGURATION_OLLAMA_URL = "https://microsoft.github.io/genaiscript/getting-started/configuration/#ollama" export const DOCS_CONFIGURATION_LMSTUDIO_URL = "https://microsoft.github.io/genaiscript/getting-started/configuration/#lmstudio" +export const DOCS_CONFIGURATION_JAN_URL = + "https://microsoft.github.io/genaiscript/getting-started/configuration/#jan" export const DOCS_CONFIGURATION_LLAMAFILE_URL = "https://microsoft.github.io/genaiscript/getting-started/configuration/#llamafile" export const DOCS_CONFIGURATION_LITELLM_URL = @@ -311,6 +315,11 @@ export const MODEL_PROVIDERS: readonly { detail: "LM Studio local server", url: DOCS_CONFIGURATION_LMSTUDIO_URL, }, + { + id: MODEL_PROVIDER_LMSTUDIO, + detail: "Jan local server", + url: DOCS_CONFIGURATION_JAN_URL, + }, { id: MODEL_PROVIDER_ALIBABA, detail: "Alibaba models", diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index b8596fbb11..f41da4824a 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -17,22 +17,12 @@ import { YAMLStringify } from "./yaml" import { DEFAULT_FENCE_FORMAT, MARKDOWN_PROMPT_FENCE, - MODEL_PROVIDER_ALIBABA, - MODEL_PROVIDER_ANTHROPIC, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - MODEL_PROVIDER_LLAMAFILE, - MODEL_PROVIDER_LMSTUDIO, - MODEL_PROVIDER_OLLAMA, - MODEL_PROVIDER_OPENAI, PROMPT_FENCE, PROMPTY_REGEX, SANITIZED_PROMPT_INJECTION, TEMPLATE_ARG_DATA_SLICE_SAMPLE, TEMPLATE_ARG_FILE_MAX_TOKENS, } from "./constants" -import { parseModelIdentifier } from "./models" import { appendAssistantMessage, appendSystemMessage, diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 20ced26720..15038e2871 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -150,6 +150,7 @@ type ModelType = OptionsOrString< | "anthropic:claude-2.1" | "anthropic:claude-instant-1.2" | "huggingface:microsoft/Phi-3-mini-4k-instruct" + | "jan:llama3.2-3b-instruct" | "google:gemini-1.5-flash" | "google:gemini-1.5-flash-latest" | "google:gemini-1.5-flash-8b" diff --git a/packages/vscode/src/lmaccess.ts b/packages/vscode/src/lmaccess.ts index bea3b4af50..8a2fb25c4e 100644 --- a/packages/vscode/src/lmaccess.ts +++ b/packages/vscode/src/lmaccess.ts @@ -1,25 +1,6 @@ /* eslint-disable @typescript-eslint/naming-convention */ import * as vscode from "vscode" import { ExtensionState } from "./state" -import { - MODEL_PROVIDER_OLLAMA, - MODEL_PROVIDER_LLAMAFILE, - MODEL_PROVIDER_AICI, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_LITELLM, - MODEL_PROVIDER_OPENAI, - MODEL_PROVIDER_CLIENT, - MODEL_PROVIDER_GITHUB, - TOOL_NAME, - MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - DOCS_CONFIGURATION_URL, - MODEL_PROVIDER_GOOGLE, - MODEL_PROVIDER_ALIBABA, - MODEL_PROVIDER_LMSTUDIO, -} from "../../core/src/constants" -import { OpenAIAPIType } from "../../core/src/host" -import { parseModelIdentifier } from "../../core/src/models" import { ChatCompletionMessageParam } from "../../core/src/chattypes" import { LanguageModelChatRequest } from "../../core/src/server/client" import { ChatStart } from "../../core/src/server/messages" @@ -27,115 +8,6 @@ import { serializeError } from "../../core/src/error" import { logVerbose } from "../../core/src/util" import { renderMessageContent } from "../../core/src/chatrender" -async function generateLanguageModelConfiguration( - state: ExtensionState, - modelId: string -) { - const { provider } = parseModelIdentifier(modelId) - const supportedProviders = [ - MODEL_PROVIDER_OLLAMA, - MODEL_PROVIDER_LLAMAFILE, - MODEL_PROVIDER_AICI, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, - MODEL_PROVIDER_LITELLM, - MODEL_PROVIDER_LMSTUDIO, - MODEL_PROVIDER_GOOGLE, - MODEL_PROVIDER_ALIBABA, - ] - if (supportedProviders.includes(provider)) { - return { provider } - } - - const languageChatModels = await state.languageChatModels() - if (Object.keys(languageChatModels).length) - return { provider: MODEL_PROVIDER_CLIENT, model: "*" } - - const items: (vscode.QuickPickItem & { - model?: string - provider?: string - apiType?: OpenAIAPIType - })[] = [] - if (isLanguageModelsAvailable()) { - const models = await vscode.lm.selectChatModels() - if (models.length) - items.push({ - label: "Visual Studio Language Chat Models", - detail: `Use a registered LLM such as GitHub Copilot.`, - model: "*", - provider: MODEL_PROVIDER_CLIENT, - }) - } - items.push( - { - label: "OpenAI", - detail: `Use a personal OpenAI subscription.`, - provider: MODEL_PROVIDER_OPENAI, - }, - { - label: "Azure OpenAI", - detail: `Use a Azure-hosted OpenAI subscription.`, - provider: MODEL_PROVIDER_AZURE_OPENAI, - apiType: "azure", - }, - { - label: "Azure AI OpenAI (serverless deployment)", - detail: `Use a Azure OpenAI serverless model deployment through Azure AI Studio.`, - provider: MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - apiType: "azure_serverless", - }, - { - label: "Azure AI Models (serverless deployment)", - detail: `Use a Azure serverless model deployment through Azure AI Studio.`, - provider: MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, - apiType: "azure_serverless_models", - }, - { - label: "GitHub Models", - detail: `Use a GitHub Models with a GitHub subscription.`, - provider: MODEL_PROVIDER_GITHUB, - }, - { - label: "Alibaba Cloud", - detail: "Use Alibaba Cloud models.", - provider: MODEL_PROVIDER_ALIBABA, - }, - { - label: "LocalAI", - description: "https://localai.io/", - detail: "Use local LLMs instead OpenAI. Requires LocalAI and Docker.", - provider: MODEL_PROVIDER_OPENAI, - apiType: "localai", - }, - { - label: "Ollama", - description: "https://ollama.com/", - detail: "Run a open source LLMs locally. Requires Ollama", - provider: MODEL_PROVIDER_OLLAMA, - }, - { - label: "AICI", - description: "http://github.com/microsoft/aici", - detail: "Generate AICI javascript prompts.", - provider: MODEL_PROVIDER_AICI, - } - ) - - const res: { model?: string; provider?: string; apiType?: OpenAIAPIType } = - await vscode.window.showQuickPick< - vscode.QuickPickItem & { - model?: string - provider?: string - apiType?: OpenAIAPIType - } - >(items, { - title: `Configure a Language Model for ${modelId}`, - }) - - return res -} - async function pickChatModel( state: ExtensionState, model: string @@ -165,26 +37,6 @@ async function pickChatModel( return chatModel } -export async function pickLanguageModel( - state: ExtensionState, - modelId: string -) { - const res = await generateLanguageModelConfiguration(state, modelId) - if (res === undefined) return undefined - - if (res.model) return res.model - else { - const configure = "Configure..." - vscode.window.showWarningMessage( - `${TOOL_NAME} - model connection not configured.`, - configure - ) - if (configure) - vscode.env.openExternal(vscode.Uri.parse(DOCS_CONFIGURATION_URL)) - return undefined - } -} - export function isLanguageModelsAvailable() { return ( typeof vscode.lm !== "undefined" &&