From 8116a110f810a084eab79e57d8459ef377750605 Mon Sep 17 00:00:00 2001 From: ai-dial-actions <149404362+ai-dial-actions@users.noreply.github.com> Date: Fri, 20 Sep 2024 16:42:29 +0000 Subject: [PATCH] deploy: 360a5dd30ed0751b9c13592129a70ce06b3a9ddf --- 404.html | 2 +- Auth/Web/IDPs/auth0.html | 2 +- Auth/Web/IDPs/cognito.html | 2 +- Auth/Web/IDPs/entraID.html | 2 +- Auth/Web/IDPs/google.html | 2 +- Auth/Web/IDPs/keycloak.html | 2 +- Auth/Web/IDPs/okta.html | 2 +- Auth/Web/overview.html | 2 +- Auth/overview.html | 2 +- Auth/programmatic-auth.html | 2 +- ...ow_to_call_image_to_text_applications.html | 2 +- ...ow_to_call_text_to_image_applications.html | 2 +- ...how_to_call_text_to_text_applications.html | 2 +- Deployment/Bedrock Model Deployment.html | 2 +- Deployment/OpenAI Model Deployment.html | 2 +- Deployment/Vertex Model Deployment.html | 2 +- Deployment/configuration.html | 2 +- Roles and Access Control/API Keys.html | 2 +- Roles and Access Control/chat-users.html | 2 +- Roles and Access Control/overview.html | 2 +- architecture.html | 2 +- assets/js/e8322e98.89d02aec.js | 1 - assets/js/e8322e98.c5f5c85c.js | 1 + ...n.a10bef4b.js => runtime~main.6a1cd384.js} | 2 +- chat-design.html | 2 +- index.html | 2 +- quick-start.html | 2 +- search.html | 2 +- supported-models.html | 2 +- tutorials/adapter-dial.html | 2 +- tutorials/azure-ad-configuration.html | 2 +- tutorials/chat-objects.html | 2 +- tutorials/data-visualization.html | 2 +- tutorials/enable-publications.html | 2 +- tutorials/high-load-performance.html | 2 +- tutorials/interceptors.html | 2 +- tutorials/load-balancer.html | 2 +- tutorials/localization.html | 2 +- tutorials/multimodality.html | 2 +- tutorials/quick-start-model.html | 2 +- tutorials/quick-start-with-addon.html | 2 +- tutorials/quick-start-with-application.html | 2 +- .../quick-start-with-self-hosted-model.html | 20 +++++++++++-------- tutorials/rate-limits-users.html | 2 +- tutorials/realtime-analytics.html | 2 +- user-guide.html | 2 +- .../dial-aws-deployment.html | 2 +- .../demos-for-developers/dial-continue.html | 2 +- .../demos-for-developers/dial-data-viz.html | 2 +- .../dial-develop-and-deploy.html | 2 +- .../dial-gcp-deployment.html | 2 +- .../dial-guided-conversation.html | 2 +- .../demos-for-developers/dial-rag-eval.html | 2 +- .../dial-unified-api.html | 2 +- video demos/demos/animated-scatterplot.html | 2 +- video demos/demos/dial-chathub.html | 2 +- video demos/demos/dial-collaboration.html | 2 +- video demos/demos/dial-excel-plugin.html | 2 +- video demos/demos/dial-omics-assistant.html | 2 +- .../demos/dial-parameterized-replay.html | 2 +- video demos/demos/dial-product-overview.html | 2 +- video demos/demos/dial-rag.html | 2 +- .../demos/dial-roles-and-rate-limits.html | 2 +- video demos/demos/dial-statgpt.html | 2 +- video demos/demos/dial-ui-basics.html | 2 +- video demos/demos/dial-web-rag.html | 2 +- video demos/demos/interceptors.html | 2 +- video demos/demos/profile-generator.html | 2 +- 68 files changed, 78 insertions(+), 74 deletions(-) delete mode 100644 assets/js/e8322e98.89d02aec.js create mode 100644 assets/js/e8322e98.c5f5c85c.js rename assets/js/{runtime~main.a10bef4b.js => runtime~main.6a1cd384.js} (77%) diff --git a/404.html b/404.html index 1cf9d746..b7fed08a 100644 --- a/404.html +++ b/404.html @@ -10,7 +10,7 @@ - +
diff --git a/Auth/Web/IDPs/auth0.html b/Auth/Web/IDPs/auth0.html index 8a2161b3..d8f2c66b 100644 --- a/Auth/Web/IDPs/auth0.html +++ b/Auth/Web/IDPs/auth0.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/Web/IDPs/cognito.html b/Auth/Web/IDPs/cognito.html index a713c8d0..10bb3876 100644 --- a/Auth/Web/IDPs/cognito.html +++ b/Auth/Web/IDPs/cognito.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/Web/IDPs/entraID.html b/Auth/Web/IDPs/entraID.html index e8fdbacf..0a3eeb98 100644 --- a/Auth/Web/IDPs/entraID.html +++ b/Auth/Web/IDPs/entraID.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/Web/IDPs/google.html b/Auth/Web/IDPs/google.html index c9731fc2..e187d69d 100644 --- a/Auth/Web/IDPs/google.html +++ b/Auth/Web/IDPs/google.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/Web/IDPs/keycloak.html b/Auth/Web/IDPs/keycloak.html index 38184204..55c5f967 100644 --- a/Auth/Web/IDPs/keycloak.html +++ b/Auth/Web/IDPs/keycloak.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/Web/IDPs/okta.html b/Auth/Web/IDPs/okta.html index 6fb75e08..a02286e8 100644 --- a/Auth/Web/IDPs/okta.html +++ b/Auth/Web/IDPs/okta.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/Web/overview.html b/Auth/Web/overview.html index 039360c4..220d4667 100644 --- a/Auth/Web/overview.html +++ b/Auth/Web/overview.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/overview.html b/Auth/overview.html index 785f85bb..5eaaf30d 100644 --- a/Auth/overview.html +++ b/Auth/overview.html @@ -10,7 +10,7 @@ - + diff --git a/Auth/programmatic-auth.html b/Auth/programmatic-auth.html index a02f20ca..5ac23916 100644 --- a/Auth/programmatic-auth.html +++ b/Auth/programmatic-auth.html @@ -10,7 +10,7 @@ - + diff --git a/Cookbook/dial-cookbook/examples/how_to_call_image_to_text_applications.html b/Cookbook/dial-cookbook/examples/how_to_call_image_to_text_applications.html index 3e87216d..d21e6663 100644 --- a/Cookbook/dial-cookbook/examples/how_to_call_image_to_text_applications.html +++ b/Cookbook/dial-cookbook/examples/how_to_call_image_to_text_applications.html @@ -10,7 +10,7 @@ - + diff --git a/Cookbook/dial-cookbook/examples/how_to_call_text_to_image_applications.html b/Cookbook/dial-cookbook/examples/how_to_call_text_to_image_applications.html index 6f3368b9..af076d9b 100644 --- a/Cookbook/dial-cookbook/examples/how_to_call_text_to_image_applications.html +++ b/Cookbook/dial-cookbook/examples/how_to_call_text_to_image_applications.html @@ -10,7 +10,7 @@ - + diff --git a/Cookbook/dial-cookbook/examples/how_to_call_text_to_text_applications.html b/Cookbook/dial-cookbook/examples/how_to_call_text_to_text_applications.html index d5b57b76..7e4749e0 100644 --- a/Cookbook/dial-cookbook/examples/how_to_call_text_to_text_applications.html +++ b/Cookbook/dial-cookbook/examples/how_to_call_text_to_text_applications.html @@ -10,7 +10,7 @@ - + diff --git a/Deployment/Bedrock Model Deployment.html b/Deployment/Bedrock Model Deployment.html index 0a033f17..913023df 100644 --- a/Deployment/Bedrock Model Deployment.html +++ b/Deployment/Bedrock Model Deployment.html @@ -10,7 +10,7 @@ - + diff --git a/Deployment/OpenAI Model Deployment.html b/Deployment/OpenAI Model Deployment.html index d0cf90fb..1cd3db5b 100644 --- a/Deployment/OpenAI Model Deployment.html +++ b/Deployment/OpenAI Model Deployment.html @@ -10,7 +10,7 @@ - + diff --git a/Deployment/Vertex Model Deployment.html b/Deployment/Vertex Model Deployment.html index 4280d925..64f95c05 100644 --- a/Deployment/Vertex Model Deployment.html +++ b/Deployment/Vertex Model Deployment.html @@ -10,7 +10,7 @@ - + diff --git a/Deployment/configuration.html b/Deployment/configuration.html index 5cf2e1c8..f42968be 100644 --- a/Deployment/configuration.html +++ b/Deployment/configuration.html @@ -10,7 +10,7 @@ - + diff --git a/Roles and Access Control/API Keys.html b/Roles and Access Control/API Keys.html index b6095dc7..64512363 100644 --- a/Roles and Access Control/API Keys.html +++ b/Roles and Access Control/API Keys.html @@ -10,7 +10,7 @@ - + diff --git a/Roles and Access Control/chat-users.html b/Roles and Access Control/chat-users.html index 2734215c..c5e132c6 100644 --- a/Roles and Access Control/chat-users.html +++ b/Roles and Access Control/chat-users.html @@ -10,7 +10,7 @@ - + diff --git a/Roles and Access Control/overview.html b/Roles and Access Control/overview.html index 742336aa..acc62d38 100644 --- a/Roles and Access Control/overview.html +++ b/Roles and Access Control/overview.html @@ -10,7 +10,7 @@ - + diff --git a/architecture.html b/architecture.html index d7e0c8d3..afdd9b3e 100644 --- a/architecture.html +++ b/architecture.html @@ -10,7 +10,7 @@ - + diff --git a/assets/js/e8322e98.89d02aec.js b/assets/js/e8322e98.89d02aec.js deleted file mode 100644 index 01d75654..00000000 --- a/assets/js/e8322e98.89d02aec.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdial=self.webpackChunkdial||[]).push([[7739],{8467:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>r,contentTitle:()=>s,default:()=>c,frontMatter:()=>o,metadata:()=>d,toc:()=>a});var l=n(5893),i=n(1151);const o={},s="Launch AI DIAL Chat with a Self-Hosted Model",d={id:"tutorials/quick-start-with-self-hosted-model",title:"Launch AI DIAL Chat with a Self-Hosted Model",description:"Introduction",source:"@site/docs/tutorials/quick-start-with-self-hosted-model.md",sourceDirName:"tutorials",slug:"/tutorials/quick-start-with-self-hosted-model",permalink:"/tutorials/quick-start-with-self-hosted-model",draft:!1,unlisted:!1,tags:[],version:"current",frontMatter:{},sidebar:"CustomSideBar",previous:{title:"Launch AI DIAL Chat with Azure Model",permalink:"/tutorials/quick-start-model"},next:{title:"Launch AI DIAL Chat with a Sample Addon",permalink:"/tutorials/quick-start-with-addon"}},r={},a=[{value:"Introduction",id:"introduction",level:2},{value:"Prerequisites",id:"prerequisites",level:2},{value:"Step 1: Get AI DIAL",id:"step-1-get-ai-dial",level:2},{value:"Step 2: Choose a model to run",id:"step-2-choose-a-model-to-run",level:2},{value:"Chat models",id:"chat-models",level:3},{value:"Vision models",id:"vision-models",level:3},{value:"Embedding models",id:"embedding-models",level:3},{value:"Step 3: Launch AI DIAL Chat",id:"step-3-launch-ai-dial-chat",level:2}];function h(e){const t={a:"a",blockquote:"blockquote",code:"code",em:"em",h1:"h1",h2:"h2",h3:"h3",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",table:"table",tbody:"tbody",td:"td",th:"th",thead:"thead",tr:"tr",ul:"ul",...(0,i.a)(),...e.components};return(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(t.h1,{id:"launch-ai-dial-chat-with-a-self-hosted-model",children:"Launch AI DIAL Chat with a Self-Hosted Model"}),"\n",(0,l.jsx)(t.h2,{id:"introduction",children:"Introduction"}),"\n",(0,l.jsxs)(t.p,{children:["In this tutorial, you will learn how to quickly launch AI DIAL Chat with a self-hosted model powered by ",(0,l.jsx)(t.a,{href:"https://ollama.com/",children:"Ollama"}),"."]}),"\n",(0,l.jsx)(t.h2,{id:"prerequisites",children:"Prerequisites"}),"\n",(0,l.jsx)(t.p,{children:"Docker engine installed on your machine (Docker Compose Version 2.20.0 +)."}),"\n",(0,l.jsxs)(t.blockquote,{children:["\n",(0,l.jsxs)(t.p,{children:["Refer to ",(0,l.jsx)(t.a,{href:"https://docs.docker.com/desktop/",children:"Docker"})," documentation."]}),"\n"]}),"\n",(0,l.jsx)(t.h2,{id:"step-1-get-ai-dial",children:"Step 1: Get AI DIAL"}),"\n",(0,l.jsxs)(t.p,{children:["Clone ",(0,l.jsx)(t.a,{href:"https://github.com/epam/ai-dial/",children:"the repository"})," with the tutorials and change directory to the following folder:"]}),"\n",(0,l.jsx)(t.pre,{children:(0,l.jsx)(t.code,{className:"language-sh",children:"cd dial-docker-compose/ollama\n"})}),"\n",(0,l.jsx)(t.h2,{id:"step-2-choose-a-model-to-run",children:"Step 2: Choose a model to run"}),"\n",(0,l.jsx)(t.p,{children:"Ollama supports a wide range of popular open-source models."}),"\n",(0,l.jsx)(t.p,{children:"Consider first the modality your are interested in - is it a regular text-to-text chat model, a multi-modal vision model or an embedding model?"}),"\n",(0,l.jsxs)(t.p,{children:["Follow the feature tags ",(0,l.jsxs)(t.em,{children:["(",(0,l.jsx)(t.code,{children:"Embeddings"}),", ",(0,l.jsx)(t.code,{children:"Code"}),", ",(0,l.jsx)(t.code,{children:"Tools"}),", ",(0,l.jsx)(t.code,{children:"Vision"}),")"]})," at ",(0,l.jsx)(t.a,{href:"https://ollama.com/search",children:"Ollama Search"})," to find the appropriate model."]}),"\n",(0,l.jsx)(t.p,{children:"We recommend choosing one of the following models which have been tested."}),"\n",(0,l.jsx)(t.h3,{id:"chat-models",children:"Chat models"}),"\n",(0,l.jsxs)(t.table,{children:[(0,l.jsx)(t.thead,{children:(0,l.jsxs)(t.tr,{children:[(0,l.jsx)(t.th,{children:"Model"}),(0,l.jsx)(t.th,{children:"Tools"})]})}),(0,l.jsxs)(t.tbody,{children:[(0,l.jsxs)(t.tr,{children:[(0,l.jsx)(t.td,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/llama3.1:8b-instruct-q4_0",children:"llama3.1:8b-instruct-q4_0"})}),(0,l.jsxs)(t.td,{children:["\u2705 ",(0,l.jsx)(t.em,{children:"(only in non-streaming mode)"})]})]}),(0,l.jsxs)(t.tr,{children:[(0,l.jsx)(t.td,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/mistral:7b-instruct-q4_0",children:"mistral:7b-instruct-q4_0"})}),(0,l.jsx)(t.td,{children:"\u274c"})]}),(0,l.jsxs)(t.tr,{children:[(0,l.jsx)(t.td,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/phi3.5:3.8b-mini-instruct-q4_0",children:"phi3.5:3.8b-mini-instruct-q4_0"})}),(0,l.jsx)(t.td,{children:"\u274c"})]}),(0,l.jsxs)(t.tr,{children:[(0,l.jsx)(t.td,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/gemma2:2b-instruct-q4_0",children:"gemma2:2b-instruct-q4_0"})}),(0,l.jsx)(t.td,{children:"\u274c"})]})]})]}),"\n",(0,l.jsx)(t.p,{children:"All the models support streaming."}),"\n",(0,l.jsx)(t.h3,{id:"vision-models",children:"Vision models"}),"\n",(0,l.jsxs)(t.ul,{children:["\n",(0,l.jsx)(t.li,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/llava:7b-v1.6-mistral-q4_0",children:"llava:7b-v1.6-mistral-q4_0"})}),"\n",(0,l.jsx)(t.li,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/llava-phi3:3.8b-mini-q4_0",children:"llava-phi3:3.8b-mini-q4_0"})}),"\n"]}),"\n",(0,l.jsx)(t.h3,{id:"embedding-models",children:"Embedding models"}),"\n",(0,l.jsxs)(t.ul,{children:["\n",(0,l.jsx)(t.li,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/nomic-embed-text:137m-v1.5-fp16",children:"nomic-embed-text:137m-v1.5-fp16"})}),"\n",(0,l.jsx)(t.li,{children:(0,l.jsx)(t.a,{href:"https://ollama.com/library/bge-m3:567m-fp16",children:"bge-m3:567m-fp16"})}),"\n"]}),"\n",(0,l.jsx)(t.h2,{id:"step-3-launch-ai-dial-chat",children:"Step 3: Launch AI DIAL Chat"}),"\n",(0,l.jsxs)(t.ol,{children:["\n",(0,l.jsxs)(t.li,{children:["\n",(0,l.jsxs)(t.p,{children:["Configure ",(0,l.jsx)(t.code,{children:".env"})," file in the current directory according to the type of model you've chosen:"]}),"\n",(0,l.jsxs)(t.ul,{children:["\n",(0,l.jsxs)(t.li,{children:["Set ",(0,l.jsx)(t.code,{children:"OLLAMA_CHAT_MODEL"})," for the name of a text model."]}),"\n",(0,l.jsxs)(t.li,{children:["Set ",(0,l.jsx)(t.code,{children:"OLLAMA_VISION_MODEL"})," for the name of a vision model."]}),"\n",(0,l.jsxs)(t.li,{children:["Set ",(0,l.jsx)(t.code,{children:"OLLAMA_EMBEDDING_MODEL"})," for the name of an embedding model."]}),"\n"]}),"\n",(0,l.jsxs)(t.p,{children:[(0,l.jsx)(t.strong,{children:"Note"}),": It's not necessary to configure all the models. If a model isn't set, then it won't be downloaded."]}),"\n"]}),"\n",(0,l.jsxs)(t.li,{children:["\n",(0,l.jsx)(t.p,{children:"Then run the following command to pull and load into the memory of the Ollama server the specified models:"}),"\n",(0,l.jsx)(t.pre,{children:(0,l.jsx)(t.code,{className:"language-sh",children:"docker compose up --abort-on-container-exit\n"})}),"\n",(0,l.jsxs)(t.blockquote,{children:["\n",(0,l.jsxs)(t.p,{children:["Keep in mind that a typical size of a lightweight Ollama model is around a few gigabytes. So it may take a few minutes ",(0,l.jsx)(t.em,{children:"(or dozens of minutes)"})," to download them on the first run depending on your Internet bandwidth."]}),"\n",(0,l.jsxs)(t.p,{children:["The model is fully loaded once ",(0,l.jsx)(t.code,{children:"ollama-setup"})," service prints ",(0,l.jsx)(t.code,{children:"The Ollama server is up and running."})]}),"\n"]}),"\n"]}),"\n",(0,l.jsxs)(t.li,{children:["\n",(0,l.jsxs)(t.p,{children:["Finally, open ",(0,l.jsx)(t.a,{href:"http://localhost:3000/",children:"http://localhost:3000/"})," in your browser to launch the AI DIAL Chat application and select an appropriate AI DIAL deployments to converse with:"]}),"\n",(0,l.jsxs)(t.ul,{children:["\n",(0,l.jsxs)(t.li,{children:[(0,l.jsx)(t.code,{children:"Self-hosted chat model"})," deployment for the ",(0,l.jsx)(t.code,{children:"OLLAMA_CHAT_MODEL"})]}),"\n",(0,l.jsxs)(t.li,{children:[(0,l.jsx)(t.code,{children:"Self-hosted vision model"})," deployment for the ",(0,l.jsx)(t.code,{children:"OLLAMA_VISION_MODEL"})]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,l.jsxs)(t.blockquote,{children:["\n",(0,l.jsx)(t.p,{children:"Note, that the vision models we tested, do not support streaming of response. Moreover, they are typically more computationally expensive than the chat models. So it may take minutes for a vision model to respond."}),"\n"]}),"\n",(0,l.jsxs)(t.p,{children:["The embedding model will become available in AI DIAL under the deployment name ",(0,l.jsx)(t.code,{children:"embedding-model"})," and could be called via the endpoint: ",(0,l.jsx)(t.code,{children:"localhost:8080/openai/deployments/embedding-model/embeddings"}),"."]})]})}function c(e={}){const{wrapper:t}={...(0,i.a)(),...e.components};return t?(0,l.jsx)(t,{...e,children:(0,l.jsx)(h,{...e})}):h(e)}},1151:(e,t,n)=>{n.d(t,{Z:()=>d,a:()=>s});var l=n(7294);const i={},o=l.createContext(i);function s(e){const t=l.useContext(o);return l.useMemo((function(){return"function"==typeof e?e(t):{...t,...e}}),[t,e])}function d(e){let t;return t=e.disableParentContext?"function"==typeof e.components?e.components(i):e.components||i:s(e.components),l.createElement(o.Provider,{value:t},e.children)}}}]); \ No newline at end of file diff --git a/assets/js/e8322e98.c5f5c85c.js b/assets/js/e8322e98.c5f5c85c.js new file mode 100644 index 00000000..2bf7dc52 --- /dev/null +++ b/assets/js/e8322e98.c5f5c85c.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdial=self.webpackChunkdial||[]).push([[7739],{8467:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>r,contentTitle:()=>s,default:()=>c,frontMatter:()=>o,metadata:()=>d,toc:()=>a});var l=t(5893),i=t(1151);const o={},s="Launch AI DIAL Chat with a Self-Hosted Model",d={id:"tutorials/quick-start-with-self-hosted-model",title:"Launch AI DIAL Chat with a Self-Hosted Model",description:"Introduction",source:"@site/docs/tutorials/quick-start-with-self-hosted-model.md",sourceDirName:"tutorials",slug:"/tutorials/quick-start-with-self-hosted-model",permalink:"/tutorials/quick-start-with-self-hosted-model",draft:!1,unlisted:!1,tags:[],version:"current",frontMatter:{},sidebar:"CustomSideBar",previous:{title:"Launch AI DIAL Chat with Azure Model",permalink:"/tutorials/quick-start-model"},next:{title:"Launch AI DIAL Chat with a Sample Addon",permalink:"/tutorials/quick-start-with-addon"}},r={},a=[{value:"Introduction",id:"introduction",level:2},{value:"Prerequisites",id:"prerequisites",level:2},{value:"Step 1: Get AI DIAL",id:"step-1-get-ai-dial",level:2},{value:"Step 2: Choose a model to run",id:"step-2-choose-a-model-to-run",level:2},{value:"Chat models",id:"chat-models",level:3},{value:"Vision models",id:"vision-models",level:3},{value:"Embedding models",id:"embedding-models",level:3},{value:"Step 3: Launch AI DIAL Chat",id:"step-3-launch-ai-dial-chat",level:2}];function h(e){const n={a:"a",blockquote:"blockquote",code:"code",em:"em",h1:"h1",h2:"h2",h3:"h3",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",table:"table",tbody:"tbody",td:"td",th:"th",thead:"thead",tr:"tr",ul:"ul",...(0,i.a)(),...e.components};return(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(n.h1,{id:"launch-ai-dial-chat-with-a-self-hosted-model",children:"Launch AI DIAL Chat with a Self-Hosted Model"}),"\n",(0,l.jsx)(n.h2,{id:"introduction",children:"Introduction"}),"\n",(0,l.jsxs)(n.p,{children:["In this tutorial, you will learn how to quickly launch AI DIAL Chat with a self-hosted model powered by ",(0,l.jsx)(n.a,{href:"https://ollama.com/",children:"Ollama"}),"."]}),"\n",(0,l.jsx)(n.h2,{id:"prerequisites",children:"Prerequisites"}),"\n",(0,l.jsx)(n.p,{children:"Docker engine installed on your machine (Docker Compose Version 2.20.0 +)."}),"\n",(0,l.jsxs)(n.blockquote,{children:["\n",(0,l.jsxs)(n.p,{children:["Refer to ",(0,l.jsx)(n.a,{href:"https://docs.docker.com/desktop/",children:"Docker"})," documentation."]}),"\n"]}),"\n",(0,l.jsx)(n.h2,{id:"step-1-get-ai-dial",children:"Step 1: Get AI DIAL"}),"\n",(0,l.jsxs)(n.p,{children:["Clone ",(0,l.jsx)(n.a,{href:"https://github.com/epam/ai-dial/",children:"the repository"})," with the tutorials and change directory to the following folder:"]}),"\n",(0,l.jsx)(n.pre,{children:(0,l.jsx)(n.code,{className:"language-sh",children:"cd dial-docker-compose/ollama\n"})}),"\n",(0,l.jsx)(n.h2,{id:"step-2-choose-a-model-to-run",children:"Step 2: Choose a model to run"}),"\n",(0,l.jsx)(n.p,{children:"Ollama supports a wide range of popular open-source models."}),"\n",(0,l.jsx)(n.p,{children:"Consider first the modality your are interested in - is it a regular text-to-text chat model, a multi-modal vision model or an embedding model?"}),"\n",(0,l.jsxs)(n.p,{children:["Follow the feature tags ",(0,l.jsxs)(n.em,{children:["(",(0,l.jsx)(n.code,{children:"Embeddings"}),", ",(0,l.jsx)(n.code,{children:"Code"}),", ",(0,l.jsx)(n.code,{children:"Tools"}),", ",(0,l.jsx)(n.code,{children:"Vision"}),")"]})," at ",(0,l.jsx)(n.a,{href:"https://ollama.com/search",children:"Ollama Search"})," to find the appropriate model."]}),"\n",(0,l.jsx)(n.p,{children:"We recommend choosing one of the following models which have been tested."}),"\n",(0,l.jsx)(n.h3,{id:"chat-models",children:"Chat models"}),"\n",(0,l.jsxs)(n.table,{children:[(0,l.jsx)(n.thead,{children:(0,l.jsxs)(n.tr,{children:[(0,l.jsx)(n.th,{children:"Model"}),(0,l.jsx)(n.th,{children:"Tools"})]})}),(0,l.jsxs)(n.tbody,{children:[(0,l.jsxs)(n.tr,{children:[(0,l.jsx)(n.td,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/llama3.1:8b-instruct-q4_0",children:"llama3.1:8b-instruct-q4_0"})}),(0,l.jsxs)(n.td,{children:["\u2705 ",(0,l.jsx)(n.em,{children:"(only in non-streaming mode)"})]})]}),(0,l.jsxs)(n.tr,{children:[(0,l.jsx)(n.td,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/mistral:7b-instruct-q4_0",children:"mistral:7b-instruct-q4_0"})}),(0,l.jsx)(n.td,{children:"\u274c"})]}),(0,l.jsxs)(n.tr,{children:[(0,l.jsx)(n.td,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/phi3.5:3.8b-mini-instruct-q4_0",children:"phi3.5:3.8b-mini-instruct-q4_0"})}),(0,l.jsx)(n.td,{children:"\u274c"})]}),(0,l.jsxs)(n.tr,{children:[(0,l.jsx)(n.td,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/gemma2:2b-instruct-q4_0",children:"gemma2:2b-instruct-q4_0"})}),(0,l.jsx)(n.td,{children:"\u274c"})]})]})]}),"\n",(0,l.jsx)(n.p,{children:"All the models support streaming."}),"\n",(0,l.jsx)(n.h3,{id:"vision-models",children:"Vision models"}),"\n",(0,l.jsxs)(n.ul,{children:["\n",(0,l.jsx)(n.li,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/llava:7b-v1.6-mistral-q4_0",children:"llava:7b-v1.6-mistral-q4_0"})}),"\n",(0,l.jsx)(n.li,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/llava-phi3:3.8b-mini-q4_0",children:"llava-phi3:3.8b-mini-q4_0"})}),"\n"]}),"\n",(0,l.jsx)(n.h3,{id:"embedding-models",children:"Embedding models"}),"\n",(0,l.jsxs)(n.ul,{children:["\n",(0,l.jsx)(n.li,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/nomic-embed-text:137m-v1.5-fp16",children:"nomic-embed-text:137m-v1.5-fp16"})}),"\n",(0,l.jsx)(n.li,{children:(0,l.jsx)(n.a,{href:"https://ollama.com/library/bge-m3:567m-fp16",children:"bge-m3:567m-fp16"})}),"\n"]}),"\n",(0,l.jsx)(n.h2,{id:"step-3-launch-ai-dial-chat",children:"Step 3: Launch AI DIAL Chat"}),"\n",(0,l.jsxs)(n.ol,{children:["\n",(0,l.jsxs)(n.li,{children:["\n",(0,l.jsxs)(n.p,{children:["Configure ",(0,l.jsx)(n.code,{children:".env"})," file in the current directory according to the type of model you've chosen:"]}),"\n",(0,l.jsxs)(n.ul,{children:["\n",(0,l.jsxs)(n.li,{children:["Set ",(0,l.jsx)(n.code,{children:"OLLAMA_CHAT_MODEL"})," for the name of a text model."]}),"\n",(0,l.jsxs)(n.li,{children:["Set ",(0,l.jsx)(n.code,{children:"OLLAMA_VISION_MODEL"})," for the name of a vision model."]}),"\n",(0,l.jsxs)(n.li,{children:["Set ",(0,l.jsx)(n.code,{children:"OLLAMA_EMBEDDING_MODEL"})," for the name of an embedding model."]}),"\n"]}),"\n",(0,l.jsxs)(n.p,{children:[(0,l.jsx)(n.strong,{children:"Note"}),": It's not necessary to configure all the models. If a model isn't set, then it won't be downloaded."]}),"\n"]}),"\n",(0,l.jsxs)(n.li,{children:["\n",(0,l.jsx)(n.p,{children:"Then run the following command to pull and load into the memory of the Ollama server the specified models:"}),"\n",(0,l.jsx)(n.pre,{children:(0,l.jsx)(n.code,{className:"language-sh",children:"docker compose up --abort-on-container-exit\n"})}),"\n",(0,l.jsxs)(n.blockquote,{children:["\n",(0,l.jsxs)(n.p,{children:["Keep in mind that a typical size of a lightweight Ollama model is around a few gigabytes. So it may take a few minutes ",(0,l.jsx)(n.em,{children:"(or more)"})," to download it on the first run, depending on your internet bandwidth and the size of the model you choose."]}),"\n",(0,l.jsxs)(n.p,{children:["The models are fully loaded once ",(0,l.jsx)(n.code,{children:"ollama-setup"})," service prints ",(0,l.jsx)(n.code,{children:"The Ollama server is up and running."})]}),"\n"]}),"\n"]}),"\n",(0,l.jsxs)(n.li,{children:["\n",(0,l.jsxs)(n.p,{children:["Finally, open ",(0,l.jsx)(n.a,{href:"http://localhost:3000/",children:"http://localhost:3000/"})," in your browser to launch the AI DIAL Chat application and select an appropriate AI DIAL deployments to converse with:"]}),"\n",(0,l.jsxs)(n.ul,{children:["\n",(0,l.jsxs)(n.li,{children:["\n",(0,l.jsxs)(n.p,{children:[(0,l.jsx)(n.code,{children:"Self-hosted chat model"})," deployment for the ",(0,l.jsx)(n.code,{children:"OLLAMA_CHAT_MODEL"})]}),"\n"]}),"\n",(0,l.jsxs)(n.li,{children:["\n",(0,l.jsxs)(n.p,{children:[(0,l.jsx)(n.code,{children:"Self-hosted vision model"})," deployment for the ",(0,l.jsx)(n.code,{children:"OLLAMA_VISION_MODEL"})]}),"\n"]}),"\n"]}),"\n",(0,l.jsxs)(n.blockquote,{children:["\n",(0,l.jsx)(n.p,{children:"Note, that the vision models we tested, do not support streaming of response. Moreover, they are typically more computationally expensive than the chat models. So it may take minutes for a vision model to respond."}),"\n"]}),"\n",(0,l.jsxs)(n.p,{children:["The embedding model will become available in AI DIAL under the deployment name ",(0,l.jsx)(n.code,{children:"embedding-model"})," and could be called via the endpoint: ",(0,l.jsx)(n.code,{children:"localhost:8080/openai/deployments/embedding-model/embeddings"}),"."]}),"\n"]}),"\n"]})]})}function c(e={}){const{wrapper:n}={...(0,i.a)(),...e.components};return n?(0,l.jsx)(n,{...e,children:(0,l.jsx)(h,{...e})}):h(e)}},1151:(e,n,t)=>{t.d(n,{Z:()=>d,a:()=>s});var l=t(7294);const i={},o=l.createContext(i);function s(e){const n=l.useContext(o);return l.useMemo((function(){return"function"==typeof e?e(n):{...n,...e}}),[n,e])}function d(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(i):e.components||i:s(e.components),l.createElement(o.Provider,{value:n},e.children)}}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.a10bef4b.js b/assets/js/runtime~main.6a1cd384.js similarity index 77% rename from assets/js/runtime~main.a10bef4b.js rename to assets/js/runtime~main.6a1cd384.js index 412293a7..8bc4983a 100644 --- a/assets/js/runtime~main.a10bef4b.js +++ b/assets/js/runtime~main.6a1cd384.js @@ -1 +1 @@ -(()=>{"use strict";var e,a,d,f,b,c={},t={};function r(e){var a=t[e];if(void 0!==a)return a.exports;var d=t[e]={id:e,loaded:!1,exports:{}};return c[e].call(d.exports,d,d.exports,r),d.loaded=!0,d.exports}r.m=c,r.c=t,e=[],r.O=(a,d,f,b)=>{if(!d){var c=1/0;for(i=0;iThen run the following command to pull and load into the memory of the Ollama server the specified models:
docker compose up --abort-on-container-exit
-Keep in mind that a typical size of a lightweight Ollama model is around a few gigabytes. So it may take a few minutes (or dozens of minutes) to download them on the first run depending on your Internet bandwidth.
-The model is fully loaded once
+ollama-setup
service printsThe Ollama server is up and running.
Keep in mind that a typical size of a lightweight Ollama model is around a few gigabytes. So it may take a few minutes (or more) to download it on the first run, depending on your internet bandwidth and the size of the model you choose.
+The models are fully loaded once
ollama-setup
service printsThe Ollama server is up and running.
Finally, open http://localhost:3000/ in your browser to launch the AI DIAL Chat application and select an appropriate AI DIAL deployments to converse with:
Self-hosted chat model
deployment for the OLLAMA_CHAT_MODEL
Self-hosted vision model
deployment for the OLLAMA_VISION_MODEL
Self-hosted chat model
deployment for the OLLAMA_CHAT_MODEL
Self-hosted vision model
deployment for the OLLAMA_VISION_MODEL
-Note, that the vision models we tested, do not support streaming of response. Moreover, they are typically more computationally expensive than the chat models. So it may take minutes for a vision model to respond.
The embedding model will become available in AI DIAL under the deployment name embedding-model
and could be called via the endpoint: localhost:8080/openai/deployments/embedding-model/embeddings
.