Skip to content

Commit

Permalink
feat: added docker-compose and test for llama3:8b via ollama
Browse files Browse the repository at this point in the history
  • Loading branch information
adubovik committed Sep 16, 2024
1 parent 9bb674f commit a96274c
Show file tree
Hide file tree
Showing 17 changed files with 196 additions and 9 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ jobs:
with:
cwd: "./dial-docker-compose/ci/addon"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart ollama example
with:
cwd: "./dial-docker-compose/ci/ollama"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

build:
needs: [run-notebooks, run-quickstart]
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ jobs:
with:
cwd: "./dial-docker-compose/ci/addon"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart ollama example
with:
cwd: "./dial-docker-compose/ci/ollama"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

build-and-deploy:
needs: [run-notebooks, run-quickstart]
Expand Down
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ yarn-error.log*
.venv
__pycache__

# DIAL Core logs
*.log
# Docker container volumes
core-data
core-logs
.ollama

/.quarto/

# Autogenerated files by Quarto
Expand Down
2 changes: 1 addition & 1 deletion dial-docker-compose/addon/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include:

services:
adapter-openai:
image: epam/ai-dial-adapter-openai:0.11.0
image: epam/ai-dial-adapter-openai:0.14.0
environment:
WEB_CONCURRENCY: "3"

Expand Down
1 change: 1 addition & 0 deletions dial-docker-compose/ci/ollama/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DIAL_DIR="./ollama"
17 changes: 17 additions & 0 deletions dial-docker-compose/ci/ollama/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
include:
- path: ../../ollama/docker-compose.yml
env_file: ./.env

services:
test:
build: test
environment:
DIAL_URL: "http://core:8080"
DIAL_API_KEY: "dial_api_key"
DIAL_API_VERSION: "2024-02-01"
DIAL_DEPLOYMENT: "llama3:8b"
depends_on:
ollama-setup:
condition: service_healthy
core:
condition: service_healthy
1 change: 1 addition & 0 deletions dial-docker-compose/ci/ollama/test/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Dockerfile
7 changes: 7 additions & 0 deletions dial-docker-compose/ci/ollama/test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM python:3.11-alpine

WORKDIR /app
COPY * /app
RUN pip install -r requirements.txt

CMD ["python", "app.py"]
66 changes: 66 additions & 0 deletions dial-docker-compose/ci/ollama/test/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import aiohttp
import asyncio
import backoff

import logging


def get_env(name: str) -> str:
value = os.environ.get(name)
if value is None:
raise ValueError(f"'{name}' environment variable must be defined")
return value


DIAL_URL = get_env("DIAL_URL")
DIAL_API_KEY = get_env("DIAL_API_KEY")
DIAL_API_VERSION = get_env("DIAL_API_VERSION")
DIAL_DEPLOYMENT = get_env("DIAL_DEPLOYMENT")

logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)


@backoff.on_exception(
backoff.expo,
(aiohttp.ClientError, aiohttp.ServerTimeoutError),
max_time=60,
)
async def post_with_retry(url: str, payload: dict, headers: dict, params: dict):
async with aiohttp.ClientSession() as session:
async with session.post(
url, json=payload, headers=headers, params=params
) as response:
response.raise_for_status()
return await response.json()


async def test_model(deployment_id: str):
api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/chat/completions"

message = "12 + 23 = ? Reply with a single number:"
payload = {
"model": deployment_id,
"messages": [{"role": "user", "content": message}],
"stream": False,
}
headers = {"api-key": DIAL_API_KEY}
params = {"api-version": DIAL_API_VERSION}

body = await post_with_retry(api_url, payload, headers, params)
log.debug(f"Response: {body}")

content = body.get("choices", [])[0].get("message", {}).get("content", "")

if "35" not in content:
raise ValueError(f"Test failed for {deployment_id!r}. ")


async def tests():
await test_model(DIAL_DEPLOYMENT)


if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(tests())
2 changes: 2 additions & 0 deletions dial-docker-compose/ci/ollama/test/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
aiohttp==3.9.4
backoff==2.2.1
6 changes: 3 additions & 3 deletions dial-docker-compose/common.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
services:
themes:
image: epam/ai-dial-chat-themes:0.4.0
image: epam/ai-dial-chat-themes:0.6.0
ports:
- "3001:8080"

chat:
ports:
- "3000:3000"
image: epam/ai-dial-chat:0.10.0
image: epam/ai-dial-chat:0.17.0
depends_on:
- themes
- core
Expand Down Expand Up @@ -36,7 +36,7 @@ services:
user: ${UID:-root}
ports:
- "8080:8080"
image: epam/ai-dial-core:0.9.0
image: epam/ai-dial-core:0.16.0
environment:
'AIDIAL_SETTINGS': '/opt/settings/settings.json'
'JAVA_OPTS': '-Dgflog.config=/opt/settings/gflog.xml'
Expand Down
2 changes: 1 addition & 1 deletion dial-docker-compose/model/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ include:

services:
adapter-openai:
image: epam/ai-dial-adapter-openai:0.11.0
image: epam/ai-dial-adapter-openai:0.14.0
environment:
WEB_CONCURRENCY: "3"
1 change: 1 addition & 0 deletions dial-docker-compose/ollama/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DIAL_DIR="./ollama"
23 changes: 23 additions & 0 deletions dial-docker-compose/ollama/core/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"routes": {},
"models": {
"llama3:8b": {
"type": "chat",
"displayName": "Llama3 8B (Locally hosted)",
"endpoint": "http://ollama:11434/v1/chat/completions"
}
},
"keys": {
"dial_api_key": {
"project": "TEST-PROJECT",
"role": "default"
}
},
"roles": {
"default": {
"limits": {
"llama3:8b": {}
}
}
}
}
30 changes: 30 additions & 0 deletions dial-docker-compose/ollama/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
include:
- path: ../common.yml
env_file: ./.env

services:
ollama:
image: ollama/ollama:0.3.10
volumes:
- ./.ollama:/root/.ollama
ports:
- "11434:11434"

ollama-setup:
depends_on:
ollama:
condition: service_started
image: alpine:3.20.3
environment:
- OLLAMA_URL=http://ollama:11434
- OLLAMA_ENABLED=${OLLAMA_ENABLED:-1}
- OLLAMA_MODEL=${OLLAMA_MODEL:-llama3:8b-instruct-q2_K}
- OLLAMA_MODEL_ALIAS=${OLLAMA_MODEL_ALIAS:-llama3:8b}
volumes:
- ./ollama_setup.sh:/setup.sh
command: sh /setup.sh
healthcheck:
test: ["CMD", "test", "-f", "/healthy"]
interval: 10s
start_period: 10s
retries: 10
26 changes: 26 additions & 0 deletions dial-docker-compose/ollama/ollama_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/sh
set -e

if [ "$OLLAMA_ENABLED" -eq 1 ]; then
apk add --no-cache curl

until curl -s ${OLLAMA_URL}; do
echo "Waiting for Ollama..."
sleep 5
done

echo "Pulling $OLLAMA_URL..."
curl -vL --fail-with-body "$OLLAMA_URL/api/pull" -d "{\"name\": \"$OLLAMA_MODEL\", \"stream\": false}"

echo "Making alias for $OLLAMA_URL: $OLLAMA_MODEL_ALIAS..."
curl -vL --fail-with-body "$OLLAMA_URL/api/copy" -d "{\"source\": \"$OLLAMA_MODEL\", \"destination\": \"$OLLAMA_MODEL_ALIAS\"}"

echo "Loading the model into memory..."
curl -vL --fail-with-body "$OLLAMA_URL/api/generate" -d "{\"model\": \"$OLLAMA_MODEL_ALIAS\"}"
else
echo "Ollama is disabled"
fi

touch /healthy

tail -f /dev/null
4 changes: 2 additions & 2 deletions dial-docker-compose/settings/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
}
},
"encryption": {
"salt": "salt",
"password": "password"
"secret": "salt",
"key": "password"
}
}

0 comments on commit a96274c

Please sign in to comment.