Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added quickstart tutorial for self-hosted models #191

Merged
merged 19 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
run-notebooks:
name: Run example Python notebooks
name: Cookbook notebooks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -18,8 +18,8 @@ jobs:
cwd: "./dial-cookbook/ci"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart:
name: Run quickstart examples
run-quickstart-model:
name: Quickstart model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -28,19 +28,44 @@ jobs:
with:
cwd: "./dial-docker-compose/ci/model"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-application:
name: Quickstart application
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart application example
with:
cwd: "./dial-docker-compose/ci/application"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-addon:
name: Quickstart addon
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart addon example
with:
cwd: "./dial-docker-compose/ci/addon"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-self-hosted-model:
name: Quickstart self-hosted model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
with:
cwd: "./dial-docker-compose/ci/ollama"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

build:
needs: [run-notebooks, run-quickstart]
needs:
- run-notebooks
- run-quickstart-model
- run-quickstart-application
- run-quickstart-addon
- run-quickstart-self-hosted-model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand Down
38 changes: 32 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
run-notebooks:
name: Run example Python notebooks
name: Cookbook notebooks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -18,8 +18,9 @@ jobs:
cwd: "./dial-cookbook/ci"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart:
name: Run quickstart examples

run-quickstart-model:
name: Quickstart model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -28,19 +29,44 @@ jobs:
with:
cwd: "./dial-docker-compose/ci/model"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-application:
name: Quickstart application
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart application example
with:
cwd: "./dial-docker-compose/ci/application"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-addon:
name: Quickstart addon
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart addon example
with:
cwd: "./dial-docker-compose/ci/addon"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-self-hosted-model:
name: Quickstart self-hosted model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
with:
cwd: "./dial-docker-compose/ci/ollama"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

build-and-deploy:
needs: [run-notebooks, run-quickstart]
needs:
- run-notebooks
- run-quickstart-model
- run-quickstart-application
- run-quickstart-addon
- run-quickstart-self-hosted-model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand Down
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ yarn-error.log*
.venv
__pycache__

# DIAL Core logs
*.log
# Docker container volumes
core-data
core-logs
.ollama

/.quarto/

# Autogenerated files by Quarto
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
## Helm Deployment

* [AI DIAL Generic Installation Simple Guide](https://github.com/epam/ai-dial-helm/tree/main/charts/dial/examples/generic/simple)

## Tutorials

* [Launch AI DIAL Chat with an Azure model](./docs/tutorials/quick-start-model.md)
* [Launch AI DIAL Chat with a self-hosted model](./docs/tutorials/quick-start-with-self-hosted-model.md)
* [Launch AI DIAL Chat with a Sample Application](./docs/tutorials/quick-start-with-application.md)
* [Launch AI DIAL Chat with a Sample Addon](./docs/tutorials/quick-start-with-addon.md)

Expand All @@ -31,7 +32,7 @@
## Configuration

* Refer to [Configuration](./docs/Deployment/configuration.md)

## Other AI DIAL Project Open Source Repositories

Here is the current list of repositories where you can find more details. You can also refer to [repository map](https://epam-rail.com/open-source).
Expand Down
2 changes: 1 addition & 1 deletion dial-docker-compose/addon/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include:

services:
adapter-openai:
image: epam/ai-dial-adapter-openai:0.11.0
image: epam/ai-dial-adapter-openai:0.14.0
environment:
WEB_CONCURRENCY: "3"

Expand Down
4 changes: 4 additions & 0 deletions dial-docker-compose/ci/ollama/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DIAL_DIR="./ollama"
OLLAMA_CHAT_MODEL=llama3.1:8b-instruct-q4_0
OLLAMA_VISION_MODEL=llava-phi3:3.8b-mini-q4_0
OLLAMA_EMBEDDING_MODEL=nomic-embed-text:137m-v1.5-fp16
16 changes: 16 additions & 0 deletions dial-docker-compose/ci/ollama/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
include:
- path: ../../ollama/docker-compose.yml
env_file: ./.env

services:
test:
build: test
environment:
DIAL_URL: "http://core:8080"
DIAL_API_KEY: "dial_api_key"
DIAL_API_VERSION: "2024-02-01"
depends_on:
ollama-setup:
condition: service_healthy
core:
condition: service_healthy
1 change: 1 addition & 0 deletions dial-docker-compose/ci/ollama/test/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Dockerfile
7 changes: 7 additions & 0 deletions dial-docker-compose/ci/ollama/test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM python:3.11-alpine

WORKDIR /app
COPY * /app
RUN pip install -r requirements.txt

CMD ["python", "app.py"]
141 changes: 141 additions & 0 deletions dial-docker-compose/ci/ollama/test/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import base64
import os
from pathlib import Path
from typing import Any
import aiohttp
import asyncio
import backoff

import logging
import time
from contextlib import asynccontextmanager


def get_env(name: str) -> str:
value = os.environ.get(name)
if value is None:
raise ValueError(f"'{name}' environment variable must be defined")
return value


DIAL_URL = get_env("DIAL_URL")
DIAL_API_KEY = get_env("DIAL_API_KEY")
DIAL_API_VERSION = get_env("DIAL_API_VERSION")

logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)


@asynccontextmanager
async def timer(name: str):
log.debug(f"[{name}] Starting...")
start = time.perf_counter()
yield
elapsed = time.perf_counter() - start
log.debug(f"[{name}] Executed in {elapsed:.2f} seconds")


@backoff.on_exception(
backoff.expo,
(aiohttp.ClientError, aiohttp.ServerTimeoutError),
max_time=60,
)
async def post_with_retry(url: str, payload: dict, headers: dict, params: dict):
async with aiohttp.ClientSession() as session:
async with session.post(
url, json=payload, headers=headers, params=params
) as response:
response.raise_for_status()
return await response.json()


def read_image_base64(png_file: Path) -> str:
return base64.b64encode(png_file.read_bytes()).decode("utf-8")

async def dial_chat_completion(deployment_id: str, messages: list) -> str:
api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/chat/completions"

payload = {
"model": deployment_id,
"messages": messages,
"stream": False,
}
headers = {"api-key": DIAL_API_KEY}
params = {"api-version": DIAL_API_VERSION}

body = await post_with_retry(api_url, payload, headers, params)
log.debug(f"Response: {body}")

content = body.get("choices", [])[0].get("message", {}).get("content", "")

log.debug(f"Content: {content}")

return content

async def dial_embeddings(deployment_id: str, input: Any) -> str:
api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/embeddings"

payload = {
"model": deployment_id,
"input": input,
}
headers = {"api-key": DIAL_API_KEY}
params = {"api-version": DIAL_API_VERSION}

body = await post_with_retry(api_url, payload, headers, params)
log.debug(f"Response: {body}")

embedding = body.get("data", [])[0].get("embedding", [])

log.debug(f"Len embedding vector: {len(embedding)}")

return embedding

async def test_chat_model(deployment_id: str):
message = "2 + 3 = ? Reply with a single number:"
messages = [{"role": "user", "content": message}]
content = await dial_chat_completion(deployment_id, messages)

if "5" not in content:
raise ValueError(f"Test failed for {deployment_id!r}")


async def test_vision_model(deployment_id: str):
base64_data = read_image_base64(Path("./image.png"))
base64_image = f"data:image/png;base64,{base64_data}"

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe the image"},
{"type": "image_url", "image_url": {"url": base64_image}},
],
}
]

content = await dial_chat_completion(deployment_id, messages)

if "vision" not in content.lower():
raise ValueError(f"Test failed for {deployment_id!r}")

async def test_embedding_model(deployment_id: str):
embeddings = await dial_embeddings(deployment_id, "cat")

if len(embeddings) == 0 or not isinstance(embeddings[0], float):
raise ValueError(f"Test failed for {deployment_id!r}")


async def tests():
async with timer("Testing chat-model"):
await test_chat_model("chat-model")

async with timer("Testing vision-model"):
await test_vision_model("vision-model")

async with timer("Testing embedding-model"):
await test_embedding_model("embedding-model")

if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(tests())
Binary file added dial-docker-compose/ci/ollama/test/image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions dial-docker-compose/ci/ollama/test/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
aiohttp==3.9.4
backoff==2.2.1
6 changes: 3 additions & 3 deletions dial-docker-compose/common.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
services:
themes:
image: epam/ai-dial-chat-themes:0.4.0
image: epam/ai-dial-chat-themes:0.6.0
ports:
- "3001:8080"

chat:
ports:
- "3000:3000"
image: epam/ai-dial-chat:0.10.0
image: epam/ai-dial-chat:0.17.0
depends_on:
- themes
- core
Expand Down Expand Up @@ -36,7 +36,7 @@ services:
user: ${UID:-root}
ports:
- "8080:8080"
image: epam/ai-dial-core:0.9.0
image: epam/ai-dial-core:0.16.0
environment:
'AIDIAL_SETTINGS': '/opt/settings/settings.json'
'JAVA_OPTS': '-Dgflog.config=/opt/settings/gflog.xml'
Expand Down
2 changes: 1 addition & 1 deletion dial-docker-compose/model/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ include:

services:
adapter-openai:
image: epam/ai-dial-adapter-openai:0.11.0
image: epam/ai-dial-adapter-openai:0.14.0
environment:
WEB_CONCURRENCY: "3"
Loading