Skip to content

Commit

Permalink
add monitoring to validator
Browse files Browse the repository at this point in the history
  • Loading branch information
mpnowacki-reef committed Nov 3, 2024
1 parent 613e28e commit f2b9509
Showing 1 changed file with 88 additions and 4 deletions.
92 changes: 88 additions & 4 deletions validator/envs/runner/data/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ services:

app:
image: backenddevelopersltd/${VALIDATOR_IMAGE_REPO}:v0-latest
pull_policy: always
pull_policy: ${PULL_POLICY:-always}
healthcheck:
test: wget -q --spider 127.0.0.1:8000/admin/login/ || exit 1
init: true
Expand All @@ -50,7 +50,7 @@ services:

celery-worker-default: &celery
image: backenddevelopersltd/${VALIDATOR_IMAGE_REPO}:v0-latest
pull_policy: always
pull_policy: ${PULL_POLICY:-always}
init: true
healthcheck:
test: celery -A compute_horde_validator status > /dev/null || exit 1
Expand Down Expand Up @@ -96,9 +96,93 @@ services:
environment:
- PROMETHEUS_MULTIPROC_DIR=/prometheus-multiproc-dir/celery-worker-receipts

cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.40.0
devices:
- /dev/kmsg:/dev/kmsg
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /cgroup:/cgroup:ro
restart: unless-stopped
logging:
<<: *logging

node-exporter:
image: prom/node-exporter:latest
restart: unless-stopped
network_mode: host
pid: host
volumes:
- /:/host:ro,rslave
command:
- '--path.rootfs=/host'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc|run|boot|var/.+)($$|/)'
- '--collector.tcpstat'
logging:
<<: *logging

prometheus:
image: backenddevelopersltd/bittensor_prometheus:latest
restart: unless-stopped
links:
- cadvisor
- app
- prometheus-proxy
- celery-flower
env_file: ./.env
volumes:
- ${HOST_WALLET_DIR}:/wallets
entrypoint: |
/bin/sh -c 'cat <<EOF > /etc/prometheus/prometheus.yml.template
global:
scrape_interval: 30s
scrape_configs:
- job_name: 'validator'
static_configs:
- targets: ['cadvisor:8080', 'host.docker.internal:9100', 'app:8000', 'celery-flower:5555']
labels:
hotkey: '{hotkey}'
remote_write:
- url: "http://prometheus-proxy:8000/prometheus_outbound_proxy/"
EOF
exec /entrypoint.sh --log.level=debug --storage.tsdb.retention.size=1GB'
extra_hosts:
- "host.docker.internal:host-gateway"

prometheus-proxy:
image: backenddevelopersltd/bittensor-prometheus-proxy:latest
restart: unless-stopped
environment:
- CENTRAL_PROMETHEUS_PROXY_URL=https://prometheus.bactensor.io
- ENV=prometheus-proxy-prod
- SECRET_KEY=${SECRET_KEY}
- BITTENSOR_WALLET_NAME=${BITTENSOR_WALLET_NAME}
- BITTENSOR_WALLET_HOTKEY_NAME=${BITTENSOR_WALLET_HOTKEY_NAME}
- SENTRY_DSN=${SENTRY_DSN}
volumes:
- ${HOST_WALLET_DIR}:/root/.bittensor/wallets

celery-flower:
image: backenddevelopersltd/${VALIDATOR_IMAGE_REPO}:v0-latest
healthcheck:
test: wget -qO- 127.0.0.1:5555 > /dev/null || exit 1
init: true
restart: unless-stopped
env_file: ./.env
environment:
- DEBUG=off
- FLOWER_TASK_RUNTIME_METRIC_BUCKETS=1,2,3,5,10,20,30,45,60,120,180,240,300,600,inf
command: celery --app=compute_horde_validator --broker="redis://redis:6379/0" flower
logging:
<<: *logging

celery-beat:
image: backenddevelopersltd/${VALIDATOR_IMAGE_REPO}:v0-latest
pull_policy: always
pull_policy: ${PULL_POLICY:-always}
init: true
restart: unless-stopped
env_file: ./.env
Expand All @@ -115,7 +199,7 @@ services:

connect-facilitator:
image: backenddevelopersltd/${VALIDATOR_IMAGE_REPO}:v0-latest
pull_policy: always
pull_policy: ${PULL_POLICY:-always}
init: true
restart: unless-stopped
env_file: ./.env
Expand Down

0 comments on commit f2b9509

Please sign in to comment.