-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.backend-llama.yml
39 lines (36 loc) · 1.22 KB
/
docker-compose.backend-llama.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# LLaMA backend docker-compose configuration.
# Intended to be used in tandem with the root configuration file. Example:
#
# docker-compose -f docker-compose.yml -f docker-compose.backend-llama.yml
#
# Assumes tokenizer and model files are added to backend-llama/resources-not-tracked-by-git before image build.
# May be used in tandem with docker-compose.backend-llama.bind.yml to bind-mount a directory containing tokenizer and
# model files instead. Example:
#
# SANDLE_LLAMA_BIND_SRC_DIR=/brtx/602-nvme2/oweller2/llama docker-compose \
# -f docker-compose.yml -f docker-compose.backend-llama.yml -f docker-compose.backend-llama.bind.yml
version: '3'
services:
openai-wrapper:
environment:
- SANDLE_BACKEND_LLAMA=http://backend-llama:8000
depends_on:
- backend-llama
backend-llama:
build:
context: backend-llama
args:
- SENTRY_DSN
- SENTRY_RELEASE=${APP_NAME}@${APP_VERSION}
command:
- /opt/llama
- ${SANDLE_LLAMA_MODEL_SIZE:-7B}
environment:
- SANDLE_LOG_LEVEL
- SENTRY_ENVIRONMENT=${SENTRY_ENVIRONMENT:-development}
restart: unless-stopped
deploy:
resources:
reservations:
devices:
- capabilities: [gpu]