diff --git a/.github/workflows/test_api.yml b/.github/workflows/test_api.yml index 131dde1..fd9d5a3 100644 --- a/.github/workflows/test_api.yml +++ b/.github/workflows/test_api.yml @@ -5,12 +5,12 @@ on: paths: - "nesis/api/core/**" - "nesis/api/tests/**" - - "nesis/api/core/requirements*" + - "nesis/api/requirements*" pull_request: paths: - "nesis/api/core/**" - "nesis/api/tests/**" - - "nesis/api/core/requirements*" + - "nesis/api/requirements*" jobs: linter: diff --git a/.github/workflows/test_rag.yml b/.github/workflows/test_rag.yml index 1c177bd..fe064ce 100644 --- a/.github/workflows/test_rag.yml +++ b/.github/workflows/test_rag.yml @@ -5,12 +5,14 @@ on: paths: - "nesis/rag/core/**" - "nesis/rag/tests/**" - - "nesis/rag/core/requirements*" + - "nesis/rag/requirements*" + - "nesis/rag/settings.yaml" pull_request: paths: - "nesis/rag/core/**" - "nesis/rag/tests/**" - - "nesis/rag/core/requirements*" + - "nesis/rag/requirements*" + - "nesis/rag/settings.yaml" jobs: linter: @@ -27,6 +29,8 @@ jobs: test: runs-on: ubuntu-latest name: Test RAG API + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} services: postgres: image: ametnes/postgresql:16-debian-12 diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 8cd2cc0..d505e0c 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -35,7 +35,10 @@ markdown_extensions: nav: - Home: 'index.md' - 'Quick Start': 'quick-start.md' - - 'Deployment': 'deployment.md' + - 'Installing': + - 'installing/compose.md' + - 'installing/helm.md' + - 'installing/ametnes.md' - 'Access Control': 'rbac.md' - 'Development Guide': - 'Local Development': 'dev-guide/local.md' diff --git a/docs/src/deployment.md b/docs/src/deployment.md deleted file mode 100644 index b62b4c6..0000000 --- a/docs/src/deployment.md +++ /dev/null @@ -1,9 +0,0 @@ -# Deploying Nesis -Nesis has been built around cloud native container deployment. -You have multiple deployment options for Nesis however they all - -## Docker Compose - -## Helm - -## Ametnes Platform diff --git a/docs/src/dev-guide/local.md b/docs/src/dev-guide/local.md index a90d7d6..e0417ca 100644 --- a/docs/src/dev-guide/local.md +++ b/docs/src/dev-guide/local.md @@ -10,14 +10,15 @@ get an overview of the components that make up Nesis and its architecture [here] 2. If you rather not install docker, you will need to have access to a Postgres and Memcached instance. 3. _Optional:_ The RAG Engine needs access to an LLM endpoint such as an OpenAI's endpoint or a private LLM endpoint in order to start querying your documents. You will need to set the `OPENAI_API_KEY` and the `OPENAI_API_BASE`environment variables. -4. You need to have python 3.11 for the API and RAG Engine microservices. -5. You also need to have node and npm installed. +4. Recently, Huggingface requires a `HF_TOKEN` to download embedding models. You may need to obtain and set your `HF_TOKEN`. +5. You need to have python 3.11 for the API and RAG Engine microservices. +6. You also need to have node and npm installed. !!! note "A word on vector databases" Nesis' RAG Engine requires a vector database to store vector embeddings. In order to contain the number of - dependant services, we use pgvector packaged into an extended Bitnami Postgres docker image `ametnes/postgresql:16-debian-12` + components, we use pgvector packaged into an extended Bitnami Postgres docker image `ametnes/postgresql:16-debian-12` [here](https://github.com/ametnes/postgresql){target="_blank"}. You are however free to use other vector databases. Curently, we support `chromadb` and `qdrant`. diff --git a/docs/src/index.md b/docs/src/index.md index 54c4eff..4ada00f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -7,7 +7,6 @@ Test Frontend Test Frontend Test Frontend - Test Helm License

@@ -24,4 +23,4 @@ collected from different documents in multiple formats such as pdf, docx, xlsx a # Demo - \ No newline at end of file + diff --git a/docs/src/installing/ametnes.md b/docs/src/installing/ametnes.md new file mode 100644 index 0000000..182ec2d --- /dev/null +++ b/docs/src/installing/ametnes.md @@ -0,0 +1,43 @@ +# Ametnes Platform + +The Ametnes Platform helps orchestrate your business applications wherever you host them. This can be in your private +data-center, in AWS, GCP or Azure. + +Nesis is available on the Ametnes Platform and can be deployed in your kubernetes cluster wherever you host it. + +The first step is to setup your kubernetes cluster as an Ametnes Application Location. See these detailed instructions. + +## Create the service + +Log into your Ametnes Cloud console at here +or sign up here if you do not have one. + +1. Using the **Services** left menu, navigate to the service management dashboard. +2. Click **New Service**. +3. Enter the **__Nesis__** to filter from the list and select **__Create__** +4. In the displayed form, enter the following info. + 1. Enter the **Name**: `Nesis-Service-DSL1` and **Description**: `Nesis-Service-DSL1`. + 2. Select a **Version** from the list. + 3. Select the **Location**. + 4. Set the `OPENAI_API_KEY` and the `HF_TOKEN` keys. + 4. Click `Create`. + +## Test connectivity + +1. Using the **Services** left menu, navigate to the service management dashboard. Your service should be listed. + + !!! note "Service not showing" + If your service list is empty, use the filter at the top right corner, to filter for ALL services. + +2. After a while, your data service status will change to `ready`. +2. To the right of your service, click the `Admin` button and you will navigate to your service's details page. +3. At the bottom of the page, copy the endpoint of your service as well as your username/key and password. +4. In your browser, paste the URL `https:///`. +5. You should get a prompt to login. + +## Clean up + +### Delete all services +1. In your Ametnes Cloud console, navigate to the **Admin** section of each service +2. Delete the service. + diff --git a/docs/src/installing/compose.md b/docs/src/installing/compose.md new file mode 100644 index 0000000..7296566 --- /dev/null +++ b/docs/src/installing/compose.md @@ -0,0 +1,164 @@ +# Docker Compose +For your quick local test, a docker compose file is provided which you can +run to standup a local instance of Nesis. + +## Compose File + +```yaml title="compose.yml" linenums="1" + +version: '3' + + +networks: + nesis: + driver: overlay + attachable: true + +services: + + nesis_frontend: + image: ametnes/nesis:latest-frontend + ports: + - "58000:8000" + environment: + API_URL: http://nesis_api:6000 + networks: + - nesis + depends_on: + - nesis_api + nesis_api: + image: ametnes/nesis:latest-api + ports: + - "56000:6000" + environment: + NESIS_API_PORT: "6000" + NESIS_API_DATABASE_URL: "postgresql://postgres:password@database:5432/nesis" + NESIS_API_TASKS_JOB_STORES_URL: "postgresql://postgres:password@database:5432/nesis" + NESIS_ADMIN_EMAIL: "some.email@domain.com" + NESIS_ADMIN_PASSWORD: "password" + NESIS_MEMCACHE_HOSTS: memcached:11211 + NESIS_API_RAG_ENDPOINT: http://nesis_rag:8080 + networks: + - nesis + depends_on: + database: + condition: service_healthy + links: + - database + - memcached + - nesis_rag + nesis_rag: + image: ametnes/nesis:latest-rag + ports: + - "58080:8080" + environment: + NESIS_RAG_SERVER_PORT: "8080" + NESIS_RAG_PGVECTOR_URL: postgresql://postgres:password@database:5432/nesis + HF_TOKEN: + # 1. local mode uses hugging face. Other options + NESIS_RAG_EMBEDDING_MODE: local + NESIS_RAG_EMBEDDING_DIMENSIONS: "384" + + # 2. openai - for OpenAI Embeddings + # NESIS_RAG_EMBEDDING_MODE: openai + # NESIS_RAG_EMBEDDING_DIMENSIONS: "1536" + + # 3. sagemaker - for Sagemaker + OPENAI_API_KEY: +# OPENAI_API_BASE: + networks: + - nesis + depends_on: + - database + memcached: + image: bitnami/memcached:1.6.19 + ports: + - "11211:11211" + networks: + - nesis + samba: + image: andyzhangx/samba:win-fix + command: ["-u", "username;password", "-s", "share;/smbshare/;yes;no;no;all;none", "-p"] + ports: + - '2445:445' + networks: + - nesis + volumes: + - 'samba_data2:/smbshare' + environment: + - USERNAME=username + - PASSWORD=password + minio: + image: docker.io/bitnami/minio:2022 + ports: + - '59000:9000' + - '59001:9001' + networks: + - nesis + volumes: + - 'minio_data:/data' + environment: + - MINIO_ROOT_USER=your_username + - MINIO_ROOT_PASSWORD=your_password + - MINIO_DEFAULT_BUCKETS=documents + database: + image: "ametnes/postgresql:16-debian-12" + ports: + - "65432:5432" + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: password + POSTGRES_DB: nesis + volumes: + - database_data:/var/lib/postgresql/data + restart: on-failure + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + networks: + - nesis +volumes: + minio_data: + samba_data2: + database_data: + +``` + +## Steps + +1. Obtain your **OPENAI_API_KEY** from https://platform.openai.com/api-keys: + - and update the `compose.yml` file entry. + +2. Start all services locally with the provided docker compose file. + + ```commandline + docker-compose -f compose.yml up + ``` + +2. Then connect to your instance via http://localhost:58000 with the following login credentials: + - *email* = `some.email@domain.com` + - *password* = `password` + +3. Connect to your minio instance via http://localhost:59001/ with the following login credentials: + - *username* = `YOUR_USERNAME` + - *password* = `YOUR_PASSWORD` + + +4. Upload some documents into your minio `documents` bucket. + +5. Back on your Nesis page, register the minio datasource with + 1. Navigate to **Settings** -> **Datasource** -> **Add** + 2. Enter the details; + + 1. Type: **S3 Compatible** + 4. Name: **documents** + 5. Host: **http://minio:9000/** + 6. Username: `YOUR_USERNAME` + 7. Password: `YOUR_PASSWORD` + 8. Click **Create** + 9. Then, run an adhoc ingestion by clicking the **Ingest** button of the datasource. + +- *Note*: Replace `YOUR_USERNAME` and `YOUR_PASSWORD` with the correct values of your `username` and `password`. + diff --git a/docs/src/installing/helm.md b/docs/src/installing/helm.md new file mode 100644 index 0000000..1411bb8 --- /dev/null +++ b/docs/src/installing/helm.md @@ -0,0 +1,112 @@ +# Nesis Helm Chart + +For your production deployment, use the provided helm chart. Save the overrides values file on your local. + +## Installing to Kubernetes + +```yaml title="overrides.yml" linenums="1" +api: + env: + - name: NESIS_ADMIN_EMAIL + value: test@domain.com + - name: NESIS_ADMIN_PASSWORD + value: password +rag: + persistence: + enabled: true + size: 10Gi + config: null + extraEnv: + - name: OPENAI_API_KEY + value: + - name: HF_TOKEN + value: + # - name: OPENAI_API_BASE + # value: http:// +minio: + enabled: true + +``` + +Then add the helm repository with + +```commandline linenums="1" +helm repo add ametnes https://ametnes.github.io/helm +helm repo update +``` + +Lastly, install Nesis into your kubernetes cluster with + +```commandline linenums="1" +helm upgrade --install nesis ametnes/nesis -f /path/to/overrides.yml +``` + +Shortly after, you should see all services running as shown using `kubectl get po` below +```commandline +NAME READY STATUS RESTARTS AGE +nesis-api-664679c8f9-9vzhb 1/1 Running 0 45s +nesis-frontend-5f69fcb4d5-cpnd7 1/1 Running 0 45s +nesis-memcached-7d7855657d-zxd82 1/1 Running 0 45s +nesis-minio-6d458bc7-cpgql 1/1 Running 0 45s +nesis-postgresql-0 1/1 Running 0 45s +nesis-rag-757584f46c-9kqtf 1/1 Running 0 45s +``` + +???+ note "RAG Configuration" + + 1. You need to set the `OPENAI_API_KEY` and `OPENAI_API_BASE` environment variables before you can start chatting + with your documents. + 2. We noticed that some Huggingface embeddings models can only be used after authenticating with Huggingface. If you + encounter 401s during embeddings generation, you need to obtain a Huggingface token and populate the `HF_TOKEN` environment + variable. + +## Testing your Nesis + +1. Port forward services; + ```commandline title="Nesis Frontend" + kubectl port-forward svc/nesis-minio 8000 + ``` + +2. Point your browser to http://localhost:8000 and login with `test@domain.com`:`password`. + +3. In another terminal; + ```commandline title="MinIO Frontend" + kubectl port-forward svc/nesis-frontend 9001 + ``` + +4. Point your browser to http://localhost:9001 and login with `admin`:`password`. + +5. Upload documents into the MinIO bucket `private-documents`. +6. In the `Nesis Frontend` add a datasource with; + 7. Navigate to **Settings**->**Datasources**. + 8. Click **Add**. + 9. Enter + 10. **Type**: _MinIO (S3 Compatible)_ + 11. **Name**: _ds-private-documents_ + 11. **Entpoint**: _http://nesis-minio:9000_ + 11. **User**: _admin_ + 11. **Password**: _password_ + 11. **Dataobjects**: _private-documents_ + 12. Click **Save**. +13. In the datasource list, find the datasource you just created and click the _**Ingest**_ button. +14. View logs of your services using `kubetail` with + ```commandline + kt nesis + ``` +15. Once ingestion is complete, navigate to **Documents** and you can start chating with your documents. + +## Overriding Key Dependencies + +The Nesis helm chart allows you to override the following components; + +1. Postgres database that backs the API component. +2. Memcached caching service. +3. Vector database + + +???+ tip "Resource requirements" + + 1. The Frontend and API microservices are lightweight and don't need alot of resources. + 2. The RAG Engine however needs to be scoped for enough memory, cpu and storage. + 3. The Postgres database needs enough memory and storage because every ingested document. + diff --git a/docs/src/quick-start.md b/docs/src/quick-start.md index dc024fa..ec99647 100644 --- a/docs/src/quick-start.md +++ b/docs/src/quick-start.md @@ -2,9 +2,12 @@ Nesis leverages the power of Generative AI to help enterprises search and discover information held in multiple repositories including; -1. MinIO Object Store -2. Windows Shares for your On-premise repositories. -3. And many more +1. Sharepoint +2. AWS' s3 Buckets +3. MinIO Object Store +4. Windows Shares for your on-premise repositories. +5. Google Drive +6. And many more Through the meticulous compilation and examination of your enterprise's data, Nesis harnesses the capabilities of Generative AI to create conversational engagement with the enterprise user. This allows the enterprise user to search through tons of documents in different formats @@ -54,7 +57,7 @@ Nesis allows the administrator to control who can perform certain actions within A role is defined by a policy defined in JSON and attached to the role. For example -```json +```json title="policy.json" linenums="1" { "items": [ { @@ -70,13 +73,14 @@ A role is defined by a policy defined in JSON and attached to the role. For exam ``` The above policy allows the role to + 1. Read from all datasources. 2. Create a prediction (all chats are predictions). For more precise control over who can access a given datasource, Nesis allows you to specify which datasources a given policy is allowed to access. Here is an example policy, -```json +```json title="policy.json" linenums="1" { "items": [ { diff --git a/docs/src/rbac.md b/docs/src/rbac.md index 09ded6b..530f4cb 100644 --- a/docs/src/rbac.md +++ b/docs/src/rbac.md @@ -26,7 +26,7 @@ sequenceDiagram ``` -### Authorization Sequence +### Authorization This sequence assumes a role has been created. As a use case, suppose the user is adding a datasource. The authentication sequence above would need to have been passed. @@ -56,14 +56,14 @@ sequenceDiagram ## Roles in Nesis ### Overview -A role is a named construct that combines a set of policies and attached to a user. The policy of the role +A role is a named construct that combines a set of policies and is attached to a user. The policy of the role indicates what actions that bearer (user) of that role is allowed to perform. A tabular description of the actors is below; -| Actor | Description | -|--------|---------------------------------------------------------------------------------------------------| -| User | The system user and bearer of the role | -| Policy | A set of rules that indicate that actions are permitted. By default all actions are not permitted | -| Role | A named object with a policy attached. A role is then assigned to a user. | +| Actor | Description | +|--------|----------------------------------------------------------------------------------------------------| +| User | The system user and bearer of the role. | +| Policy | A set of rules that indicate what actions are permitted. By default all actions are not permitted. | +| Role | A named object with a policy attached. A role is then assigned to a user. | !!! note @@ -78,18 +78,18 @@ For a user to perform any of these actions to any object within Nesis, they must The objects in Nesis that require policies to operate include. -| Object | Description | -|------------|-------------------------------------------------------------------------------| -| User | The system user | -| Role | A role created on the system and containing policies. | -| Datasource | A datasource that Nesis sources data from. | -| Task | A scheduled job that runs in the background such as datasource ingestion jobs | -| Prediction | Any user interaction with the rag engine is a prediction. | +| Object | Description | +|------------|--------------------------------------------------------------------------------| +| User | The system user. | +| Role | A role created on the system and containing policies. | +| Datasource | A datasource that Nesis sources data from. | +| Task | A scheduled job that runs in the background such as datasource ingestion jobs. | +| Prediction | Any user interaction with the rag engine is a prediction. | -Some actions require more than policy rule. For example to add a datasource that has a cron schedule, -the user role must permit CREATE:/datasource and CREATE:/tasks. +Some actions require more than one policy rule. For example to add a datasource that has a cron schedule, +the user role must permit `CREATE:/datasource` and `CREATE:/tasks`. -This fine-grained control enables you to be flexible in your role based access control. +This fine-grained control allows for greater flexibility when assigning permissions to users. A policy is simply a JSON document that is in the format @@ -115,9 +115,8 @@ Where `` can be one of the objects in the table above. A role must be created first before it can be attached to a user. When a role is created, policy rules must be assigned to the role. -You can attach a role to a user during creation of the user or after. Role policy enforcement is done -in real time and on every request to the API backend so any changes to the policy will be effective immediately -on the next request to the backend. +You can attach a role to a user during creation of the user or after. Role policies apply immediately and the next time +a request to the API backend is made, the policy will be enforced. #### Examples Here is a list of examples showing how roles can be applied within Nesis. diff --git a/nesis/api/core/launcher.py b/nesis/api/core/launcher.py deleted file mode 100644 index 3a69802..0000000 --- a/nesis/api/core/launcher.py +++ /dev/null @@ -1,45 +0,0 @@ -"""FastAPI app creation, logger configuration and main API routes.""" - -import logging - -from fastapi import Depends, FastAPI, Request -from fastapi.middleware.cors import CORSMiddleware -from injector import Injector - -from nesis.rag.core.server.chat.chat_router import chat_router -from nesis.rag.core.server.chunks.chunks_router import chunks_router -from nesis.rag.core.server.completions.completions_router import completions_router -from nesis.rag.core.server.health.health_router import health_router -from nesis.rag.core.server.ingest.ingest_router import ingest_router -from nesis.rag.core.settings.settings import Settings - -logger = logging.getLogger(__name__) - - -def create_app(root_injector: Injector) -> FastAPI: - - # Start the API - async def bind_injector_to_request(request: Request) -> None: - request.state.injector = root_injector - - app = FastAPI(dependencies=[Depends(bind_injector_to_request)]) - - app.include_router(completions_router) - app.include_router(chat_router) - app.include_router(chunks_router) - app.include_router(ingest_router) - app.include_router(health_router) - - settings = root_injector.get(Settings) - if settings.server.cors.enabled: - logger.debug("Setting up CORS middleware") - app.add_middleware( - CORSMiddleware, - allow_credentials=settings.server.cors.allow_credentials, - allow_origins=settings.server.cors.allow_origins, - allow_origin_regex=settings.server.cors.allow_origin_regex, - allow_methods=settings.server.cors.allow_methods, - allow_headers=settings.server.cors.allow_headers, - ) - - return app diff --git a/nesis/rag/Dockerfile b/nesis/rag/Dockerfile index 50d637d..e86ad8f 100644 --- a/nesis/rag/Dockerfile +++ b/nesis/rag/Dockerfile @@ -21,11 +21,16 @@ RUN if [ "$CORE" = "cuda" ] ; \ fi -ARG NESIS_VERSION + FROM python:3.11.6-slim-bookworm +ARG NESIS_VERSION +ARG UID=1001 +ARG GID=1002 +ARG UNAME=nesis RUN apt-get update \ && apt-get clean \ - && adduser --system --home /app --shell /bin/bash nesis \ + && addgroup --system --gid $GID $UNAME \ + && adduser --system --uid $UID --gid $GID --home /app --shell /bin/bash $UNAME \ && apt install ffmpeg tesseract-ocr -y WORKDIR /app @@ -35,13 +40,14 @@ COPY --chown=nesis nesis/rag nesis/rag COPY --chown=nesis nesis/__init__.py nesis/__init__.py ENV PATH="/app/.venv/bin/:$PATH" +ENV HOME=/app ARG PORT="8080" EXPOSE ${PORT} ENV PYTHONPATH="${PYTHONPATH}:/app/nesis" ENV NESIS_VERSION=$NESIS_VERSION -USER nesis +USER 1001 CMD python nesis/rag/core/main.py -# Remember to set LLAMA_INDEX_CACHE_DIR \ No newline at end of file +# Remember to set LLAMA_INDEX_CACHE_DIR diff --git a/nesis/rag/requirements.txt b/nesis/rag/requirements.txt index def41f0..2e1a89d 100644 --- a/nesis/rag/requirements.txt +++ b/nesis/rag/requirements.txt @@ -49,5 +49,5 @@ unstructured.pytesseract==0.3.12 unstructured-inference==0.7.25 pillow_heif==0.16.0 -# This causes conflicts from onnxruntime so we attempt to install it last. Do not pin to a version so pip resolves it +# This causes conflicts from onnxruntime, so we attempt to install it last. Do not pin to a version so pip resolves it llama-index-embeddings-fastembed diff --git a/nesis/rag/settings.yaml b/nesis/rag/settings.yaml index dbb781a..bf457de 100644 --- a/nesis/rag/settings.yaml +++ b/nesis/rag/settings.yaml @@ -8,7 +8,7 @@ data: local_data_folder: ${NESIS_RAG_DATA_LOCAL_DATA_FOLDER:local_data/rag} llm: - mode: openai + mode: ${NESIS_RAG_LLM_MODE:openai} # Should be matching the selected model max_new_tokens: 512 context_window: 3900 @@ -17,7 +17,7 @@ llm: embedding: # Should be matching the value above in most cases mode: ${NESIS_RAG_EMBEDDING_MODE:local} - ingest_mode: simple + ingest_mode: ${NESIS_RAG_EMBEDDING_INGEST_MODE:simple } vectorstore: database: pgvector