feat: added quickstart tutorial for self-hosted models (#191)

* feat: added docker-compose and test for llama3:8b via ollama * feat: added quick-start tutorial for self-hosted models * chore: formatting fix * fix: fixed ci test for ollama model * chore: bumped the default Ollama model to Llama 3.1 * chore: fixed doc * feat: added instructions for self-hosted vision models * fix: updated env for ollama ci test * chore: divided a single quickstart ci job into separate jobs * chore: simplified names of the ci jobs * fix: migrated setup.py script for ollama to httpx * feat: added ci test for self-hosted embedding model * feat: used .env file instead of env vars in self-hosted model tutorial * fix: increased timeout in the ollama setup script * review * feat: added progress bar for model downloading --------- Co-authored-by: sr-remsha <sr.remsha@gmail.com>
epam · Sep 19, 2024 · 4460529 · 4460529
1 parent 92286ed
commit 4460529
Show file tree

Hide file tree

Showing 25 changed files with 561 additions and 27 deletions.
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -7,7 +7,7 @@ on:
 
 jobs:
   run-notebooks:
-    name: Run example Python notebooks
+    name: Cookbook notebooks
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -18,8 +18,8 @@ jobs:
           cwd: "./dial-cookbook/ci"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
 
-  run-quickstart:
-    name: Run quickstart examples
+  run-quickstart-model:
+    name: Quickstart model
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -28,19 +28,44 @@ jobs:
         with:
           cwd: "./dial-docker-compose/ci/model"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
+
+  run-quickstart-application:
+    name: Quickstart application
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
-        name: Run quickstart application example
         with:
           cwd: "./dial-docker-compose/ci/application"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
+
+  run-quickstart-addon:
+    name: Quickstart addon
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
-        name: Run quickstart addon example
         with:
           cwd: "./dial-docker-compose/ci/addon"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
 
+  run-quickstart-self-hosted-model:
+    name: Quickstart self-hosted model
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      - uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
+        with:
+          cwd: "./dial-docker-compose/ci/ollama"
+          up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
+
   build:
-    needs: [run-notebooks, run-quickstart]
+    needs:
+      - run-notebooks
+      - run-quickstart-model
+      - run-quickstart-application
+      - run-quickstart-addon
+      - run-quickstart-self-hosted-model
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -7,7 +7,7 @@ on:
 
 jobs:
   run-notebooks:
-    name: Run example Python notebooks
+    name: Cookbook notebooks
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -18,8 +18,9 @@ jobs:
           cwd: "./dial-cookbook/ci"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
 
-  run-quickstart:
-    name: Run quickstart examples
+
+  run-quickstart-model:
+    name: Quickstart model
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -28,19 +29,44 @@ jobs:
         with:
           cwd: "./dial-docker-compose/ci/model"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
+
+  run-quickstart-application:
+    name: Quickstart application
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
-        name: Run quickstart application example
         with:
           cwd: "./dial-docker-compose/ci/application"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
+
+  run-quickstart-addon:
+    name: Quickstart addon
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
-        name: Run quickstart addon example
         with:
           cwd: "./dial-docker-compose/ci/addon"
           up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
 
+  run-quickstart-self-hosted-model:
+    name: Quickstart self-hosted model
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      - uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
+        with:
+          cwd: "./dial-docker-compose/ci/ollama"
+          up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
+
   build-and-deploy:
-    needs: [run-notebooks, run-quickstart]
+    needs:
+      - run-notebooks
+      - run-quickstart-model
+      - run-quickstart-application
+      - run-quickstart-addon
+      - run-quickstart-self-hosted-model
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

diff --git a/.gitignore b/.gitignore
@@ -23,8 +23,11 @@ yarn-error.log*
 .venv
 __pycache__
 
-# DIAL Core logs
-*.log
+# Docker container volumes
+core-data
+core-logs
+.ollama
+
 /.quarto/
 
 # Autogenerated files by Quarto

diff --git a/README.md b/README.md
@@ -13,10 +13,11 @@
 ## Helm Deployment
 
 * [AI DIAL Generic Installation Simple Guide](https://github.com/epam/ai-dial-helm/tree/main/charts/dial/examples/generic/simple)
-  
+
 ## Tutorials
 
 * [Launch AI DIAL Chat with an Azure model](./docs/tutorials/quick-start-model.md)
+* [Launch AI DIAL Chat with a self-hosted model](./docs/tutorials/quick-start-with-self-hosted-model.md)
 * [Launch AI DIAL Chat with a Sample Application](./docs/tutorials/quick-start-with-application.md)
 * [Launch AI DIAL Chat with a Sample Addon](./docs/tutorials/quick-start-with-addon.md)
 
@@ -31,7 +32,7 @@
 ## Configuration
 
 * Refer to [Configuration](./docs/Deployment/configuration.md)
-  
+
 ## Other AI DIAL Project Open Source Repositories
 
 Here is the current list of repositories where you can find more details. You can also refer to [repository map](https://epam-rail.com/open-source).

diff --git a/dial-docker-compose/addon/docker-compose.yml b/dial-docker-compose/addon/docker-compose.yml
@@ -4,7 +4,7 @@ include:
 
 services:
   adapter-openai:
-    image: epam/ai-dial-adapter-openai:0.11.0
+    image: epam/ai-dial-adapter-openai:0.14.0
     environment:
       WEB_CONCURRENCY: "3"
 

diff --git a/dial-docker-compose/ci/ollama/.env b/dial-docker-compose/ci/ollama/.env
@@ -0,0 +1,4 @@
+DIAL_DIR="./ollama"
+OLLAMA_CHAT_MODEL=llama3.1:8b-instruct-q4_0
+OLLAMA_VISION_MODEL=llava-phi3:3.8b-mini-q4_0
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text:137m-v1.5-fp16
diff --git a/dial-docker-compose/ci/ollama/docker-compose.yml b/dial-docker-compose/ci/ollama/docker-compose.yml
@@ -0,0 +1,16 @@
+include:
+  - path: ../../ollama/docker-compose.yml
+    env_file: ./.env
+
+services:
+  test:
+    build: test
+    environment:
+      DIAL_URL: "http://core:8080"
+      DIAL_API_KEY: "dial_api_key"
+      DIAL_API_VERSION: "2024-02-01"
+    depends_on:
+      ollama-setup:
+        condition: service_healthy
+      core:
+        condition: service_healthy
diff --git a/dial-docker-compose/ci/ollama/test/.dockerignore b/dial-docker-compose/ci/ollama/test/.dockerignore
@@ -0,0 +1 @@
+Dockerfile
diff --git a/dial-docker-compose/ci/ollama/test/Dockerfile b/dial-docker-compose/ci/ollama/test/Dockerfile
@@ -0,0 +1,7 @@
+FROM python:3.11-alpine
+
+WORKDIR /app
+COPY * /app
+RUN pip install -r requirements.txt
+
+CMD ["python", "app.py"]
diff --git a/dial-docker-compose/ci/ollama/test/app.py b/dial-docker-compose/ci/ollama/test/app.py
@@ -0,0 +1,141 @@
+import base64
+import os
+from pathlib import Path
+from typing import Any
+import aiohttp
+import asyncio
+import backoff
+
+import logging
+import time
+from contextlib import asynccontextmanager
+
+
+def get_env(name: str) -> str:
+    value = os.environ.get(name)
+    if value is None:
+        raise ValueError(f"'{name}' environment variable must be defined")
+    return value
+
+
+DIAL_URL = get_env("DIAL_URL")
+DIAL_API_KEY = get_env("DIAL_API_KEY")
+DIAL_API_VERSION = get_env("DIAL_API_VERSION")
+
+logging.basicConfig(level=logging.DEBUG)
+log = logging.getLogger(__name__)
+
+
+@asynccontextmanager
+async def timer(name: str):
+    log.debug(f"[{name}] Starting...")
+    start = time.perf_counter()
+    yield
+    elapsed = time.perf_counter() - start
+    log.debug(f"[{name}] Executed in {elapsed:.2f} seconds")
+
+
+@backoff.on_exception(
+    backoff.expo,
+    (aiohttp.ClientError, aiohttp.ServerTimeoutError),
+    max_time=60,
+)
+async def post_with_retry(url: str, payload: dict, headers: dict, params: dict):
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            url, json=payload, headers=headers, params=params
+        ) as response:
+            response.raise_for_status()
+            return await response.json()
+
+
+def read_image_base64(png_file: Path) -> str:
+    return base64.b64encode(png_file.read_bytes()).decode("utf-8")
+
+async def dial_chat_completion(deployment_id: str, messages: list) -> str:
+    api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/chat/completions"
+
+    payload = {
+        "model": deployment_id,
+        "messages": messages,
+        "stream": False,
+    }
+    headers = {"api-key": DIAL_API_KEY}
+    params = {"api-version": DIAL_API_VERSION}
+
+    body = await post_with_retry(api_url, payload, headers, params)
+    log.debug(f"Response: {body}")
+
+    content = body.get("choices", [])[0].get("message", {}).get("content", "")
+
+    log.debug(f"Content: {content}")
+
+    return content
+
+async def dial_embeddings(deployment_id: str, input: Any) -> str:
+    api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/embeddings"
+
+    payload = {
+        "model": deployment_id,
+        "input": input,
+    }
+    headers = {"api-key": DIAL_API_KEY}
+    params = {"api-version": DIAL_API_VERSION}
+
+    body = await post_with_retry(api_url, payload, headers, params)
+    log.debug(f"Response: {body}")
+
+    embedding = body.get("data", [])[0].get("embedding", [])
+
+    log.debug(f"Len embedding vector: {len(embedding)}")
+
+    return embedding
+
+async def test_chat_model(deployment_id: str):
+    message = "2 + 3 = ? Reply with a single number:"
+    messages = [{"role": "user", "content": message}]
+    content = await dial_chat_completion(deployment_id, messages)
+
+    if "5" not in content:
+        raise ValueError(f"Test failed for {deployment_id!r}")
+
+
+async def test_vision_model(deployment_id: str):
+    base64_data = read_image_base64(Path("./image.png"))
+    base64_image = f"data:image/png;base64,{base64_data}"
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Describe the image"},
+                {"type": "image_url", "image_url": {"url": base64_image}},
+            ],
+        }
+    ]
+
+    content = await dial_chat_completion(deployment_id, messages)
+
+    if "vision" not in content.lower():
+        raise ValueError(f"Test failed for {deployment_id!r}")
+
+async def test_embedding_model(deployment_id: str):
+    embeddings = await dial_embeddings(deployment_id, "cat")
+
+    if len(embeddings) == 0 or not isinstance(embeddings[0], float):
+        raise ValueError(f"Test failed for {deployment_id!r}")
+
+
+async def tests():
+    async with timer("Testing chat-model"):
+        await test_chat_model("chat-model")
+
+    async with timer("Testing vision-model"):
+        await test_vision_model("vision-model")
+
+    async with timer("Testing embedding-model"):
+        await test_embedding_model("embedding-model")
+
+if __name__ == "__main__":
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(tests())
diff --git a/dial-docker-compose/ci/ollama/test/image.png b/dial-docker-compose/ci/ollama/test/image.png
diff --git a/dial-docker-compose/ci/ollama/test/requirements.txt b/dial-docker-compose/ci/ollama/test/requirements.txt
@@ -0,0 +1,2 @@
+aiohttp==3.9.4
+backoff==2.2.1
diff --git a/dial-docker-compose/common.yml b/dial-docker-compose/common.yml
@@ -1,13 +1,13 @@
 services:
   themes:
-    image: epam/ai-dial-chat-themes:0.4.0
+    image: epam/ai-dial-chat-themes:0.6.0
     ports:
       - "3001:8080"
 
   chat:
     ports:
       - "3000:3000"
-    image: epam/ai-dial-chat:0.10.0
+    image: epam/ai-dial-chat:0.17.0
     depends_on:
       - themes
       - core
@@ -36,7 +36,7 @@ services:
     user: ${UID:-root}
     ports:
       - "8080:8080"
-    image: epam/ai-dial-core:0.9.0
+    image: epam/ai-dial-core:0.16.0
     environment:
       'AIDIAL_SETTINGS': '/opt/settings/settings.json'
       'JAVA_OPTS': '-Dgflog.config=/opt/settings/gflog.xml'

diff --git a/dial-docker-compose/model/docker-compose.yml b/dial-docker-compose/model/docker-compose.yml
@@ -4,6 +4,6 @@ include:
 
 services:
   adapter-openai:
-    image: epam/ai-dial-adapter-openai:0.11.0
+    image: epam/ai-dial-adapter-openai:0.14.0
     environment:
       WEB_CONCURRENCY: "3"