Skip to content

Commit

Permalink
first cut at redis (#2226)
Browse files Browse the repository at this point in the history
* first cut at redis

* fix startup dependencies on redis

* kombu cleanup - fail silently

* mypy

* add redis_host environment override

* update REDIS_HOST env var in docker-compose.dev.yml

* update the rest of the docker files

* update contributing guide

* renaming cache to cache_volume

* add redis password to various deployments

* try setting up pr testing for helm

* fix indent

* hopefully this release version actually exists

* fix command line option to --chart-dirs

* fetch-depth 0

* edit values.yaml

* try setting ct working directory

* bypass testing only on change for now

* move files and lint them

* update helm testing

* some issues suggest using --config works

* add vespa repo

* add postgresql repo

* increase timeout

* try amd64 runner

* fix redis password reference

* add comment to helm chart testing workflow

* rename helm testing workflow to disable it

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
  • Loading branch information
rkuo-danswer and LostVector authored Sep 6, 2024
1 parent aeb6060 commit 2933c35
Show file tree
Hide file tree
Showing 43 changed files with 268 additions and 23 deletions.
67 changes: 67 additions & 0 deletions .github/workflows/pr-helm-chart-testing.yml.disabled.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# This workflow is intentionally disabled while we're still working on it
# It's close to ready, but a race condition needs to be fixed with
# API server and Vespa startup, and it needs to have a way to build/test against
# local containers

name: Helm - Lint and Test Charts

on:
merge_group:
pull_request:
branches: [ main ]

jobs:
lint-test:
runs-on: Amd64

# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Helm
uses: azure/setup-helm@v4.2.0
with:
version: v3.14.4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
- run: |
python -m pip install --upgrade pip
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
pip install -r backend/requirements/model_server.txt

- name: Set up chart-testing
uses: helm/chart-testing-action@v2.6.1

- name: Run chart-testing (list-changed)
id: list-changed
run: |
changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }})
if [[ -n "$changed" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
fi

- name: Run chart-testing (lint)
# if: steps.list-changed.outputs.changed == 'true'
run: ct lint --all --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}

- name: Create kind cluster
# if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@v1.10.0

- name: Run chart-testing (install)
# if: steps.list-changed.outputs.changed == 'true'
run: ct install --all --config ct.yaml
# run: ct install --target-branch ${{ github.event.repository.default_branch }}

1 change: 1 addition & 0 deletions .github/workflows/run-it.yml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ jobs:
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
danswer/integration-test-runner:it
Expand Down
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ ensure it is running before continuing with the following docker commands.

First navigate to `danswer/deployment/docker_compose`, then start up Vespa and Postgres with:
```bash
docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db
docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db cache
```
(index refers to Vespa and relational_db refers to Postgres)
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)

#### Running Danswer
To start the frontend, navigate to `danswer/web` and run:
Expand Down
31 changes: 24 additions & 7 deletions backend/danswer/background/celery/celery_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from celery import Celery # type: ignore
from celery.contrib.abortable import AbortableTask # type: ignore
from celery.exceptions import TaskRevokedError
from sqlalchemy import inspect
from sqlalchemy import text
from sqlalchemy.orm import Session

Expand All @@ -20,7 +21,10 @@
from danswer.background.task_utils import name_cc_prune_task
from danswer.background.task_utils import name_document_set_sync_task
from danswer.configs.app_configs import JOB_TIMEOUT
from danswer.configs.constants import POSTGRES_CELERY_APP_NAME
from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY
from danswer.configs.app_configs import REDIS_HOST
from danswer.configs.app_configs import REDIS_PASSWORD
from danswer.configs.app_configs import REDIS_PORT
from danswer.configs.constants import PostgresAdvisoryLocks
from danswer.connectors.factory import instantiate_connector
from danswer.connectors.models import InputType
Expand All @@ -35,9 +39,7 @@
from danswer.db.document_set import fetch_documents_for_document_set_paginated
from danswer.db.document_set import get_document_set_by_id
from danswer.db.document_set import mark_document_set_as_synced
from danswer.db.engine import build_connection_string
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.engine import SYNC_DB_API
from danswer.db.models import DocumentSet
from danswer.document_index.document_index_utils import get_both_index_names
from danswer.document_index.factory import get_default_document_index
Expand All @@ -46,11 +48,17 @@

logger = setup_logger()

connection_string = build_connection_string(
db_api=SYNC_DB_API, app_name=POSTGRES_CELERY_APP_NAME
CELERY_PASSWORD_PART = ""
if REDIS_PASSWORD:
CELERY_PASSWORD_PART = f":{REDIS_PASSWORD}@"

# example celery_broker_url: "redis://:password@localhost:6379/15"
celery_broker_url = (
f"redis://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}"
)
celery_backend_url = (
f"redis://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}"
)
celery_broker_url = f"sqla+{connection_string}"
celery_backend_url = f"db+{connection_string}"
celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)


Expand Down Expand Up @@ -360,6 +368,15 @@ def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool:
bool: Returns True if there are more rows to process, False if not.
"""

inspector = inspect(db_session.bind)
if not inspector:
return False

# With the move to redis as celery's broker and backend, kombu tables may not even exist.
# We can fail silently.
if not inspector.has_table("kombu_message"):
return False

query = text(
"""
SELECT id, timestamp, payload
Expand Down
10 changes: 10 additions & 0 deletions backend/danswer/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,16 @@
except ValueError:
POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT

REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""

# Used for general redis things
REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER", 0))

# Used by celery as broker and backend
REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15))

#####
# Connector Configs
#####
Expand Down
1 change: 1 addition & 0 deletions backend/requirements/default.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ python-docx==1.1.0
python-dotenv==1.0.0
python-multipart==0.0.7
pywikibot==9.0.0
redis==5.0.8
requests==2.32.2
requests-oauthlib==1.3.1
retry==0.9.2 # This pulls in py which is in CVE-2022-42969, must remove py from image
Expand Down
12 changes: 12 additions & 0 deletions ct.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# See https://github.com/helm/chart-testing#configuration

chart-dirs:
- deployment/helm/charts

chart-repos:
- vespa=https://unoplat.github.io/vespa-helm-charts
- postgresql=https://charts.bitnami.com/bitnami

helm-extra-args: --timeout 900s

validate-maintainers: false
18 changes: 16 additions & 2 deletions deployment/docker_compose/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
ports:
Expand Down Expand Up @@ -62,6 +63,7 @@ services:
# Other services
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
Expand Down Expand Up @@ -107,6 +109,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
Expand Down Expand Up @@ -137,6 +140,7 @@ services:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
- POSTGRES_DB=${POSTGRES_DB:-}
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
Expand Down Expand Up @@ -330,9 +334,19 @@ services:
# in order to make this work on both Unix-like systems and windows
command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data

volumes:
cache_volume:
db_volume:
vespa_volume: # Created by the container itself

Expand Down
17 changes: 16 additions & 1 deletion deployment/docker_compose/docker-compose.gpu-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
ports:
Expand Down Expand Up @@ -58,6 +59,7 @@ services:
# Other services
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
Expand Down Expand Up @@ -99,6 +101,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
Expand Down Expand Up @@ -129,6 +132,7 @@ services:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
- POSTGRES_DB=${POSTGRES_DB:-}
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
Expand Down Expand Up @@ -341,9 +345,20 @@ services:
command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data


volumes:
cache_volume:
db_volume:
vespa_volume:
# Created by the container itself
Expand Down
15 changes: 15 additions & 0 deletions deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
env_file:
Expand All @@ -20,6 +21,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-oidc}
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
extra_hosts:
- "host.docker.internal:host-gateway"
Expand All @@ -39,6 +41,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
Expand All @@ -48,6 +51,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-oidc}
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
extra_hosts:
Expand Down Expand Up @@ -204,7 +208,18 @@ services:
- .env.nginx


cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data


volumes:
cache_volume:
db_volume:
vespa_volume:
# Created by the container itself
Expand Down
Loading

0 comments on commit 2933c35

Please sign in to comment.