From d950e28cea22bf6764482cf3aa4251ac2d2f34d4 Mon Sep 17 00:00:00 2001 From: erhant Date: Thu, 29 Aug 2024 15:58:42 +0300 Subject: [PATCH 1/6] simplify repo and docs --- Cargo.toml | 1 + Makefile | 27 ++-- README.md | 294 ++------------------------------------ docs/NODE_GUIDE.md | 246 +++++++++++++++++++++++++++++++ docs/NODE_PERFORMANCE.md | 51 +++++++ docs/NODE_SPECS.md | 73 ++++++++++ src/config/mod.rs | 9 +- src/config/ollama.rs | 73 ++++++++++ src/p2p/data_transform.rs | 4 +- tests/ollama_test.rs | 84 ----------- 10 files changed, 481 insertions(+), 381 deletions(-) create mode 100644 docs/NODE_GUIDE.md create mode 100644 docs/NODE_PERFORMANCE.md create mode 100644 docs/NODE_SPECS.md delete mode 100644 tests/ollama_test.rs diff --git a/Cargo.toml b/Cargo.toml index dc64d5e..83624fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ inherits = "release" debug = true [features] +# used by flamegraphs & instruments profiling = [] [dependencies] diff --git a/Makefile b/Makefile index 516a18d..618bf31 100644 --- a/Makefile +++ b/Makefile @@ -5,33 +5,33 @@ ifneq (,$(wildcard ./.env)) endif ############################################################################### -.PHONY: launch # | Run with INFO log-level in release mode +.PHONY: launch # | Run with INFO logs in release mode launch: - RUST_LOG=warn,dkn_compute=info cargo run --release + RUST_LOG=none,dkn_compute=info cargo run --release -.PHONY: run # | Run with INFO log-level +.PHONY: run # | Run with INFO logs run: RUST_LOG=none,dkn_compute=info cargo run -.PHONY: debug # | Run with DEBUG log-level with INFO log-level workflows +.PHONY: debug # | Run with DEBUG logs with INFO log-level workflows debug: RUST_LOG=warn,dkn_compute=debug,ollama_workflows=info cargo run -.PHONY: trace # | Run with crate-level TRACE logging +.PHONY: trace # | Run with TRACE logs trace: - RUST_LOG=none,dkn_compute=trace,libp2p=debug cargo run + RUST_LOG=warn,dkn_compute=trace,libp2p=debug cargo run .PHONY: build # | Build build: cargo build -.PHONY: profile-cpu # | Profile CPU usage with flamegraph +.PHONY: profile-cpu # | Profile CPU usage with flamegraph profile-cpu: cargo flamegraph --root --profile=profiling --features=profiling -.PHONY: profile-mem # | Profile memory usage with instruments +.PHONY: profile-mem # | Profile memory usage with instruments profile-mem: - cargo instruments --profile=profiling --features=profiling -t Leaks + cargo instruments --profile=profiling --features=profiling -t Allocations .PHONY: version # | Print version version: @@ -42,17 +42,12 @@ version: test: cargo test -############################################################################### -.PHONY: prompt # | Run a single prompt on a model -prompt: - cargo run --example prompt - ############################################################################### -.PHONY: lint # | Run clippy +.PHONY: lint # | Run linter (clippy) lint: cargo clippy -.PHONY: format # | Run formatter +.PHONY: format # | Run formatter (cargo fmt) format: cargo fmt -v diff --git a/README.md b/README.md index 58c11f4..baf6b92 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ ## About -A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**. To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node. +A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**. To get started, see [node guide](./docs/NODE_GUIDE.md)! ### Tasks @@ -38,258 +38,23 @@ Compute nodes can technically do any arbitrary task, from computing the square r - **Workflows**: Each task is given in the form of a workflow, based on [Ollama Workflows](https://github.com/andthattoo/ollama-workflows) (see repository for more information). In simple terms, each workflow defines the agentic behavior of an LLM, all captured in a single JSON file, and can represent things ranging from simple LLM generations to iterative web searching. -## Requirements +## Node Running -### Software - -You need the following applications to run compute node: - -- **Git**: We will use `git` to clone the repository from GitHub, and pull latest changes for updates later. -- **Docker**: Our services will make use of Docker so that the node can run on any machine. - -> [!TIP] -> -> You can check if you have these via: -> -> ```sh -> which git -> which docker -> ``` - -### Hardware - -**For overall specifications about required CPU and RAM, please refer to [dkn-node-specs](https://github.com/firstbatchxyz/dkn-node-specs).** - -In general, if you are using Ollama you will need the memory to run large models locally, which depend on the model's size that you are willing to. If you are in a memory-constrained environment, you can opt to use OpenAI models instead. - -> [!NOTE] -> -> The compute node is a lightweight process, but you may see increased memory & CPU usage during the initial testing phases, due to various protocol-level operations with the growing network size. - -## Setup - -To be able to run a node, we need to make a few simple preparations. Follow the steps below one by one. - -### 1. Clone the repository - -This repository has the necessary setup to run the node, so start by cloning it using the command below: - -```bash -git clone https://github.com/firstbatchxyz/dkn-compute-node -cd dkn-compute-node -``` - -### 2. Prepare Environment Variables - -Dria Compute Node makes use of several environment variables. Create a `.env` file, and copy the environment variables as given in [.env.example](./.env.example). We will fill out the missing parts in a moment. - -```sh -cp .env.example .env -``` - -> [!NOTE] -> -> `DKN_ADMIN_PUBLIC_KEY` is used to verify that the tasks are given by certain nodes, so that your node does not work for tasks given to the network by untrusted people. You don't need to change this, simply copy and paste it to your `.env`. - -> [!TIP] -> -> While adding anything to your `.env`, you can do it without leaving the terminal. For example, suppose you want to set `VALUE` to some `KEY`, you can do it as: -> -> ```sh -> echo "KEY=VALUE" >> .env -> ``` -> -> If you would like to view the `.env` without leaving the terminal, you can do: -> -> ```sh -> cat .env -> ``` - -### 3. Prepare Ethereum Wallet - -Dria makes use of the same Ethereum wallet, that is the recipient of your hard-earned rewards! Place your private key at `DKN_WALLET_SECRET_KEY` in `.env` without the 0x prefix. It should look something like: - -```sh -DKN_WALLET_SECRET_KEY=ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 -``` - -> [!CAUTION] -> -> Always make sure your private key is within the .gitignore'd `.env` file, nowhere else! To be even safer, you can use a throwaway wallet, you can always transfer your rewards to a main wallet afterwards. - -### 4. Setup LLM Provider - -For the final step, we need to make sure we can serve LLM requests. - -#### For OpenAI - -If you will be using OpenAI to serve its models, you need to have an API key in the environment. Simply set the key within your `.env`: - -```sh -OPENAI_API_KEY= -``` - -#### For Ollama - -Of course, first you have to install Ollama; see their [download page](https://ollama.com/download). Then, you must **first pull a small embedding model that is used internally**. - -```sh -ollama pull hellord/mxbai-embed-large-v1:f16 -``` - -For the models that you choose (see list of models just below [here](#1-choose-models)) you can download them with same command. Note that if your model size is large, pulling them may take a while. - -```sh -# example for phi3:3.8b -ollama pull phi3:3.8b -``` - -> [!TIP] -> -> Alternatively, you can set `OLLAMA_AUTO_PULL=true` in the `.env` so that the compute node will always download the missing models for you. - -#### Optional Services - -Based on presence of API keys, [Ollama Workflows](https://github.com/andthattoo/ollama-workflows/) may use more superior services instead of free alternatives, e.g. [Serper](https://serper.dev/) instead of [DuckDuckGo](https://duckduckgo.com/) or [Jina](https://jina.ai/) without rate-limit instead of with rate-limit. Add these within your `.env` as: - -```sh -SERPER_API_KEY= -JINA_API_KEY= -``` - -## Usage - -With all setup steps above completed, we are ready to start a node! - -### 1. Choose Model(s) - -Based on the resources of your machine, you must decide which models that you will be running locally. For example, you can use OpenAI with their models, not running anything locally at all; or you can use Ollama with several models loaded to disk, and only one loaded to memory during its respective task. Available models (see [here](https://github.com/andthattoo/ollama-workflows/blob/main/src/program/atomics.rs#L269) for latest) are: - -#### Ollama Models - -- `adrienbrault/nous-hermes2theta-llama3-8b:q8_0` -- `phi3:14b-medium-4k-instruct-q4_1` -- `phi3:14b-medium-128k-instruct-q4_1` -- `phi3:3.8b` -- `llama3.1:latest` -- `phi3.5:3.8b` -- `phi3.5:3.8b-mini-instruct-fp16` - -#### OpenAI Models - -- `gpt-3.5-turbo` -- `gpt-4-turbo` -- `gpt-4o` -- `gpt-4o-mini` - -> [!TIP] -> -> If you are using Ollama, make sure you have pulled the required models, as specified in the [section above](#4-setup-ollama-for-ollama-users)! - -### 2. Start Docker - -Our node will be running within a Docker container, so we should make sure that Docker is running before the next step. You can launch Docker via its [desktop application](https://www.docker.com/products/docker-desktop/), or a command such as: - -```sh -sudo systemctl start docker -``` - -> [!NOTE] -> -> You don't need to do this step if Docker is already running in the background. - -### 3. Run Node - -It's time to run our compute node. We have a starter script that makes this much easier, you can see available commadns with: - -See the available commands with: - -```sh -chmod +x start.sh -./start.sh --help -``` - -Simply run the script with the model names provided, such as: - -```sh -./start.sh -m=llama3.1:latest -m=gpt-3.5-turbo -``` - -Start script will run the containers in the background. You can check their logs either via the terminal or from [Docker Desktop](https://www.docker.com/products/docker-desktop/). - -#### Running in Debug Mode - -To print DEBUG-level logs for the compute node, you can add `--dev` argument to the start script. For example: - -```sh -./start.sh -m=gpt-4o-mini --dev -``` - -Running in debug mode will also allow you to see behind the scenes of Ollama Workflows, i.e. you can see the reasoning of the LLM as it executes the task. - -> Similarly, you can run in trace mode with `--trace` to see trace logs, which cover low-level logs from the p2p client. - -### 4. Looking at Logs - -To see your logs, you can go to [Docker Desktop](https://www.docker.com/products/docker-desktop/) and see the running containers and find `dkn-compute-node`. There, open the containers within the compose (click on `>` to the left) and click on any of the container to see its logs. - -Alternatively, you can use `docker compose logs` such as below: - -```sh -docker compose logs -f compute # compute node logs -docker compose logs -f ollama # ollama logs -``` - -The `-f` option is so that you can track the logs from terminal. If you prefer to simply check the latest logs, you can use a command such as: - -```sh -# logs from last 1 hour -docker compose logs --since=1h compute - -# logs from last 30 minutes -docker compose logs --since=30m compute -``` - -### 5. Stopping the Node - -When you start your node with `./start.sh`, it will wait for you in the same terminal to do CTRL+C before stopping. Once you do that, the containers will be stopped and removed. You can also kill the containers manually, doing CTRL+C afterwards will do nothing in such a case. - -> [!NOTE] -> -> Sometimes it may not immediately exit whilst executing a task, if you REALLY need to quite the process you can kill it manually. - -### Using Ollama - -> If you don't have Ollama installed, you can ignore this section. - -If you have Ollama installed already (e.g. via `brew install ollama`) then you must indicate that you will be using that Ollama, instead of a Docker container. To do this, we set the provide the argument `--local-ollama=true` which is `true` by default. With this, the compute node will use the Ollama server on your machine, instead of a Docker container. - -If the Ollama server is not running, the start script will initiate it with `ollama serve` and terminate it when the node is being stopped. - -- If `--local-ollama=false` or the local Ollama server is reachable, the compute node will use a Docker Compose service for it. - -> [!TIP] -> -> There are three Docker Compose Ollama options: `ollama-cpu`, `ollama-cuda`, and `ollama-rocm`. The start script will decide which option to use based on the host machine's GPU specifications. - -```sh -# Run with local ollama -./start.sh -m=phi3 --local-ollama=true -``` - -### Additional Static Nodes - -You can add additional relay nodes & bootstrap nodes from environment, using the `DKN_RELAY_NODES` and `DKN_BOOTSTRAP_NODES` variables respectively. Simply write the `Multiaddr` string of the static nodes as comma-separated values, and the compute node will pick them up at the start. +Refer to [node guide](./docs/NODE_GUIDE.md) to quickly get started and run your own node! ## Releases -We have 3 types of releases: +For _production_ images: - **Versioned**: With each release, a versioned image is deployed on Docker hub with the version tag `:vX.X.X`. -- **Latest**: As usual, the latest version is kept under `:latest` tag. -- **Development**: On each push to `master`, a new image is created with `:unstable`. +- **Latest**: The latest production image is always under the `:latest` tag. + +For _development_ images: -See deployed images on [Docker Hub](https://hub.docker.com/orgs/firstbatch/members). +- **Master**: On each push to `master` branch, a new image is created with the tag `master--`. +- **Unstable**: The latest development image is always under the `:unstable` tag. + +You can see the list of deployed images on [Docker Hub](https://hub.docker.com/orgs/firstbatch/members). ## Development @@ -308,30 +73,19 @@ make run # info-level logs make debug # debug-level logs ``` -### Testing & Benchmarking +### Testing You can the tests as follows: ```sh -make test # unit tests -make test-ollama # Ollama tests (requires a running Ollama client) +make test ``` -To measure the speed of some Ollama models we have a benchmark that uses some models for a few prompts: - -```sh -cargo run --release --example ollama -``` - -You can also benchmark these models using a larger task list at a given path, with the following command: - -```sh -JSON_PATH="./path/to/your.json" cargo run --release --example ollama -``` +We also have some benchmarking and profiling scripts, see [node performance](./docs/NODE_PERFORMANCE.md) for more details. ### Documentation -Open crate docs using: +You can view the inline documentation with: ```sh make docs @@ -346,24 +100,6 @@ make lint # clippy make format # rustfmt ``` -### Profiling - -We would like to profile both CPU and Memory usage. - -To create a [flamegraph](https://crates.io/crates/flamegraph) of the application, do: - -```sh -make profile-cpu -``` - -This will create a profiling build that inherits `release` mode, except with debug information. - -To profile memory usage, we make use of [cargo-instruments](https://crates.io/crates/cargo-instruments). - -> [!NOTE] -> -> CPU profiling may require super-user access. - ## License This project is licensed under the [Apache License 2.0](https://opensource.org/license/Apache-2.0). diff --git a/docs/NODE_GUIDE.md b/docs/NODE_GUIDE.md new file mode 100644 index 0000000..4eca1f8 --- /dev/null +++ b/docs/NODE_GUIDE.md @@ -0,0 +1,246 @@ +## Node Running + +Running a Dria Compute Node is pretty straightforward. + +## Requirements + +### Software + +You need the following applications to run compute node: + +- **Git**: We will use `git` to clone the repository from GitHub, and pull latest changes for updates later. +- **Docker**: Our services will make use of Docker so that the node can run on any machine. + +> [!TIP] +> +> You can check if you have these via: +> +> ```sh +> which git +> which docker +> ``` + +### Hardware + +**To learn about hardware specifications such as required CPU and RAM, please refer to [node specifications](./docs/NODE_SPECS.md).** + +In general, if you are using Ollama you will need the memory to run large models locally, which depend on the model's size that you are willing to. If you are in a memory-constrained environment, you can opt to use OpenAI models instead. + +> [!NOTE] +> +> The compute node is a lightweight process, but you may see increased memory & CPU usage during the initial testing phases, due to various protocol-level operations with the growing network size. + +## Setup + +To be able to run a node, we need to make a few simple preparations. Follow the steps below one by one. + +### 1. Clone the repository + +This repository has the necessary setup to run the node, so start by cloning it using the command below: + +```bash +git clone https://github.com/firstbatchxyz/dkn-compute-node +cd dkn-compute-node +``` + +### 2. Prepare Environment Variables + +Dria Compute Node makes use of several environment variables. Create a `.env` file, and copy the environment variables as given in [.env.example](./.env.example). We will fill out the missing parts in a moment. + +```sh +cp .env.example .env +``` + +> [!NOTE] +> +> `DKN_ADMIN_PUBLIC_KEY` is used to verify that the tasks are given by certain nodes, so that your node does not work for tasks given to the network by untrusted people. You don't need to change this, simply copy and paste it to your `.env`. + +> [!TIP] +> +> While adding anything to your `.env`, you can do it without leaving the terminal. For example, suppose you want to set `VALUE` to some `KEY`, you can do it as: +> +> ```sh +> echo "KEY=VALUE" >> .env +> ``` +> +> If you would like to view the `.env` without leaving the terminal, you can do: +> +> ```sh +> cat .env +> ``` + +### 3. Prepare Ethereum Wallet + +Dria makes use of the same Ethereum wallet, that is the recipient of your hard-earned rewards! Place your private key at `DKN_WALLET_SECRET_KEY` in `.env` without the 0x prefix. It should look something like: + +```sh +DKN_WALLET_SECRET_KEY=ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 +``` + +> [!CAUTION] +> +> Always make sure your private key is within the .gitignore'd `.env` file, nowhere else! To be even safer, you can use a throwaway wallet, you can always transfer your rewards to a main wallet afterwards. + +### 4. Setup LLM Provider + +For the final step, we need to make sure we can serve LLM requests. + +#### For OpenAI + +If you will be using OpenAI to serve its models, you need to have an API key in the environment. Simply set the key within your `.env`: + +```sh +OPENAI_API_KEY= +``` + +#### For Ollama + +Of course, first you have to install Ollama; see their [download page](https://ollama.com/download). Then, you must **first pull a small embedding model that is used internally**. + +```sh +ollama pull hellord/mxbai-embed-large-v1:f16 +``` + +For the models that you choose (see list of models just below [here](#1-choose-models)) you can download them with same command. Note that if your model size is large, pulling them may take a while. + +```sh +# example for phi3:3.8b +ollama pull phi3:3.8b +``` + +> [!TIP] +> +> Alternatively, you can set `OLLAMA_AUTO_PULL=true` in the `.env` so that the compute node will always download the missing models for you. + +#### Optional Services + +Based on presence of API keys, [Ollama Workflows](https://github.com/andthattoo/ollama-workflows/) may use more superior services instead of free alternatives, e.g. [Serper](https://serper.dev/) instead of [DuckDuckGo](https://duckduckgo.com/) or [Jina](https://jina.ai/) without rate-limit instead of with rate-limit. Add these within your `.env` as: + +```sh +SERPER_API_KEY= +JINA_API_KEY= +``` + +## Usage + +With all setup steps above completed, we are ready to start a node! + +### 1. Choose Model(s) + +Based on the resources of your machine, you must decide which models that you will be running locally. For example, you can use OpenAI with their models, not running anything locally at all; or you can use Ollama with several models loaded to disk, and only one loaded to memory during its respective task. Available models (see [here](https://github.com/andthattoo/ollama-workflows/blob/main/src/program/atomics.rs#L269) for latest) are: + +#### Ollama Models + +- `adrienbrault/nous-hermes2theta-llama3-8b:q8_0` +- `phi3:14b-medium-4k-instruct-q4_1` +- `phi3:14b-medium-128k-instruct-q4_1` +- `phi3:3.8b` +- `llama3.1:latest` +- `phi3.5:3.8b` +- `phi3.5:3.8b-mini-instruct-fp16` + +#### OpenAI Models + +- `gpt-3.5-turbo` +- `gpt-4-turbo` +- `gpt-4o` +- `gpt-4o-mini` + +> [!TIP] +> +> If you are using Ollama, make sure you have pulled the required models, as specified in the [section above](#4-setup-ollama-for-ollama-users)! + +### 2. Start Docker + +Our node will be running within a Docker container, so we should make sure that Docker is running before the next step. You can launch Docker via its [desktop application](https://www.docker.com/products/docker-desktop/), or a command such as: + +```sh +sudo systemctl start docker +``` + +> [!NOTE] +> +> You don't need to do this step if Docker is already running in the background. + +### 3. Run Node + +It's time to run our compute node. We have a starter script that makes this much easier, you can see available commadns with: + +See the available commands with: + +```sh +chmod +x start.sh +./start.sh --help +``` + +Simply run the script with the model names provided, such as: + +```sh +./start.sh -m=llama3.1:latest -m=gpt-3.5-turbo +``` + +Start script will run the containers in the background. You can check their logs either via the terminal or from [Docker Desktop](https://www.docker.com/products/docker-desktop/). + +#### Running in Debug Mode + +To print DEBUG-level logs for the compute node, you can add `--dev` argument to the start script. For example: + +```sh +./start.sh -m=gpt-4o-mini --dev +``` + +Running in debug mode will also allow you to see behind the scenes of Ollama Workflows, i.e. you can see the reasoning of the LLM as it executes the task. + +> Similarly, you can run in trace mode with `--trace` to see trace logs, which cover low-level logs from the p2p client. + +### 4. Looking at Logs + +To see your logs, you can go to [Docker Desktop](https://www.docker.com/products/docker-desktop/) and see the running containers and find `dkn-compute-node`. There, open the containers within the compose (click on `>` to the left) and click on any of the container to see its logs. + +Alternatively, you can use `docker compose logs` such as below: + +```sh +docker compose logs -f compute # compute node logs +docker compose logs -f ollama # ollama logs +``` + +The `-f` option is so that you can track the logs from terminal. If you prefer to simply check the latest logs, you can use a command such as: + +```sh +# logs from last 1 hour +docker compose logs --since=1h compute + +# logs from last 30 minutes +docker compose logs --since=30m compute +``` + +### 5. Stopping the Node + +When you start your node with `./start.sh`, it will wait for you in the same terminal to do CTRL+C before stopping. Once you do that, the containers will be stopped and removed. You can also kill the containers manually, doing CTRL+C afterwards will do nothing in such a case. + +> [!NOTE] +> +> Sometimes it may not immediately exit whilst executing a task, if you REALLY need to quite the process you can kill it manually. + +### Using Ollama + +> If you don't have Ollama installed, you can ignore this section. + +If you have Ollama installed already (e.g. via `brew install ollama`) then you must indicate that you will be using that Ollama, instead of a Docker container. To do this, we set the provide the argument `--local-ollama=true` which is `true` by default. With this, the compute node will use the Ollama server on your machine, instead of a Docker container. + +If the Ollama server is not running, the start script will initiate it with `ollama serve` and terminate it when the node is being stopped. + +- If `--local-ollama=false` or the local Ollama server is reachable, the compute node will use a Docker Compose service for it. + +> [!TIP] +> +> There are three Docker Compose Ollama options: `ollama-cpu`, `ollama-cuda`, and `ollama-rocm`. The start script will decide which option to use based on the host machine's GPU specifications. + +```sh +# Run with local ollama +./start.sh -m=phi3 --local-ollama=true +``` + +### Additional Static Nodes + +You can add additional relay nodes & bootstrap nodes from environment, using the `DKN_RELAY_NODES` and `DKN_BOOTSTRAP_NODES` variables respectively. Simply write the `Multiaddr` string of the static nodes as comma-separated values, and the compute node will pick them up at the start. diff --git a/docs/NODE_PERFORMANCE.md b/docs/NODE_PERFORMANCE.md new file mode 100644 index 0000000..a7927c0 --- /dev/null +++ b/docs/NODE_PERFORMANCE.md @@ -0,0 +1,51 @@ +# Node Performance + +We have some benchmarks to see model performance using Ollama, and some profiling scripts to check CPU and memory usage. + +## Benchmarking + +You can the tests as follows: + +```sh +make test # unit tests +``` + +To measure the speed of some Ollama models we have a benchmark that uses some models for a few prompts: + +```sh +cargo run --release --example ollama +``` + +You can also benchmark these models using a larger task list at a given path, with the following command: + +```sh +JSON_PATH="./path/to/your.json" cargo run --release --example ollama +``` + +## Profiling + +We have scripts to profile both CPU and Memory usage. A special build is created for profiling, via a custom `profiling` feature, such that the output inherits `release` mode but also has debug symbols. + +Furthermore, the profiling build will exit automatically after a certain time, as if CTRL+C has been pressed. This is needed by the memory profiling tool in particular. + +### CPU Profiling + +To create a [flamegraph](https://crates.io/crates/flamegraph) of the application, do: + +```sh +make profile-cpu +``` + +This will create a profiling build that inherits `release` mode, except with debug information. + +> [!NOTE] +> +> CPU profiling may require super-user access. + +### Memory Profiling + +To profile memory usage, we make use of [cargo-instruments](https://crates.io/crates/cargo-instruments). + +```sh +make profile-mem +``` diff --git a/docs/NODE_SPECS.md b/docs/NODE_SPECS.md new file mode 100644 index 0000000..ecfc6fc --- /dev/null +++ b/docs/NODE_SPECS.md @@ -0,0 +1,73 @@ +# 🚀 LLM Node Runner's Guide: Minimum Specs + +Hello, Drians! 👋 Here's a guide to help you understand the minimum specs needed for running different LLMs. We've broken it down into two main categories: (1) **GPU-enabled** nodes and (2) **CPU-only** nodes, as you can run your nodes on machines both _with_ or _without_ GPU. + +- ## 🖥️ GPU-Enabled Nodes + +These specs are based on a system with 16 CPUs and 64GB RAM. + +| Model | GPU Memory | CPU Usage (cores) | RAM Usage | +| -------------- | -------------- | ----------------- | ------------ | +| Llama3_1_8B | 6.1 - 6.2 GB | 8.6 - 12.8 cores | 8.5 GB | +| Phi3Mini | 3.3 - 3.4 GB | 14.4 - 22.5 cores | 7.7 GB | +| Phi3Medium128k | 10.9 - 11.0 GB | 7.9 - 11.4 cores | 5.3 GB | +| Phi3Medium | 10.9 - 11.0 GB | 4.3 - 5.7 cores | 5.3 GB | +| NousTheta | 9.6 GB | 4.1 - 4.8 cores | 6.4 - 6.6 GB | + +- ## 💻 CPU-Only Nodes + +For those running without a GPU, we've got you covered too! Here are the specs for different CPU types: + +### ARM (4 CPU, 16GB RAM) + +| Model | CPU Usage (cores) | RAM Usage | +| -------------- | ----------------- | ------------- | +| NousTheta | 3.0 - 3.5 cores | 9.6 GB | +| Phi3Medium | 3.7 - 3.8 cores | 10.4 GB | +| Phi3Medium128k | 3.7 - 3.8 cores | 10.4 GB | +| Phi3Mini | 3.2 - 6.1 cores | 5.6 - 11.4 GB | +| Llama3_1_8B | 3.4 - 3.7 cores | 6.1 GB | + +### ARM (8 CPU, 16GB RAM) + +| Model | CPU Usage (cores) | RAM Usage | +| -------------- | ----------------- | ------------- | +| NousTheta | 6.2 - 6.3 cores | 9.6 GB | +| Phi3Medium | 6.5 cores | 10.8 GB | +| Phi3Medium128k | 6.5 cores | 10.8 GB | +| Phi3Mini | 5.4 - 7.0 cores | 5.8 - 11.6 GB | +| Llama3_1_8B | 3.4 - 4.2 cores | 6.2 GB | + +### AMD (8 CPU, 16GB RAM) + +| Model | CPU Usage (cores) | RAM Usage | +| -------------- | ----------------- | ------------- | +| NousTheta | 2.3 - 3.2 cores | 9.5 GB | +| Phi3Medium | 3.3 - 3.4 cores | 10.3 GB | +| Phi3Medium128k | 1.6 - 3.2 cores | 10.2 GB | +| Phi3Mini | 2.8 - 3.1 cores | 5.4 - 11.4 GB | +| Llama3_1_8B | 4.5 - 4.6 cores | 11.1 GB | + +### Intel (8 CPU, 16GB RAM) + +| Model | CPU Usage (cores) | RAM Usage | +| -------------- | ----------------- | ------------- | +| NousTheta | 2.3 - 2.9 cores | 9.7 GB | +| Phi3Medium | 3.1 - 3.3 cores | 10.4 GB | +| Phi3Medium128k | 2.2 - 3.3 cores | 10.3 GB | +| Phi3Mini | 2.6 - 4.1 cores | 5.4 - 11.0 GB | +| Llama3_1_8B | 3.7 - 3.9 cores | 11.3 GB | + +## 📝 Notes + +- CPU usage can vary significantly between tasks, especially for long context vs. multiple steps. + +- Some models may require more than the available CPU cores, which could lead to slower performance. + +- RAM usage is generally consistent but can spike for certain operations. + +- **Important**: For systems with 4 CPUs and 8GB RAM, only Phi3Mini was able to run successfully.\*\* + +- **Important**: Lower CPU count results in lower performance. Systems with fewer CPUs will process requests more slowly, especially for models that require more CPU resources than are available. + +Remember, these are minimum specs, and your experience may vary depending on the specific tasks and workload. Happy node running! 🎉 diff --git a/src/config/mod.rs b/src/config/mod.rs index d765ea6..75c8339 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -43,7 +43,14 @@ impl DriaComputeNodeConfig { Ok(secret_env) => { let secret_dec = hex::decode(secret_env.trim_start_matches("0x")) .expect("Secret key should be 32-bytes hex encoded."); - SecretKey::parse_slice(&secret_dec).expect("Secret key should be parseable.") + + // if secret key is all-zeros, create one randomly + // this is useful for testing & creating nodes on the fly + if secret_dec.iter().all(|b| b == &0) { + SecretKey::random(&mut rand::thread_rng()) + } else { + SecretKey::parse_slice(&secret_dec).expect("Secret key should be parseable.") + } } Err(err) => { log::error!("No secret key provided: {}", err); diff --git a/src/config/ollama.rs b/src/config/ollama.rs index a63560c..1031d45 100644 --- a/src/config/ollama.rs +++ b/src/config/ollama.rs @@ -222,3 +222,76 @@ impl OllamaConfig { false } } + +#[cfg(test)] +mod tests { + use ollama_workflows::ollama_rs::{generation::completion::request::GenerationRequest, Ollama}; + use ollama_workflows::{Executor, Model, ProgramMemory, Workflow}; + + #[tokio::test] + #[ignore = "run this manually"] + async fn test_ollama_prompt() { + let model = Model::Phi3Mini.to_string(); + let ollama = Ollama::default(); + ollama.pull_model(model.clone(), false).await.unwrap(); + let prompt = "The sky appears blue during the day because of a process called scattering. \ + When sunlight enters the Earth's atmosphere, it collides with air molecules such as oxygen and nitrogen. \ + These collisions cause some of the light to be absorbed or reflected, which makes the colors we see appear more vivid and vibrant. \ + Blue is one of the brightest colors that is scattered the most by the atmosphere, making it visible to our eyes during the day. \ + What may be the question this answer?".to_string(); + + let response = ollama + .generate(GenerationRequest::new(model, prompt.clone())) + .await + .expect("Should generate response"); + println!("Prompt: {}\n\nResponse:{}", prompt, response.response); + } + + #[tokio::test] + #[ignore = "run this manually"] + async fn test_ollama_workflow() { + let workflow = r#"{ + "name": "Simple", + "description": "This is a simple workflow", + "config": { + "max_steps": 5, + "max_time": 100, + }, + "tasks":[ + { + "id": "A", + "name": "Random Poem", + "description": "Writes a poem about Kapadokya.", + "prompt": "Please write a poem about Kapadokya.", + "operator": "generation", + "outputs": [ + { + "type": "write", + "key": "final_result", + "value": "__result" + } + ] + }, + { + "id": "__end", + "name": "end", + "description": "End of the task", + "prompt": "End of the task", + "operator": "end", + } + ], + "steps":[ + { + "source":"A", + "target":"end" + } + ] + }"#; + let workflow: Workflow = serde_json::from_str(workflow).unwrap(); + let exe = Executor::new(Model::Phi3Mini); + let mut memory = ProgramMemory::new(); + + let result = exe.execute(None, workflow, &mut memory).await; + println!("Result: {}", result); + } +} diff --git a/src/p2p/data_transform.rs b/src/p2p/data_transform.rs index 6b63908..f5becac 100644 --- a/src/p2p/data_transform.rs +++ b/src/p2p/data_transform.rs @@ -72,6 +72,8 @@ impl DataTransform for TTLDataTransform { mod tests { use std::time::Duration; + use libp2p::PeerId; + use super::*; #[test] @@ -88,7 +90,7 @@ mod tests { // inbound transform let raw_message = RawMessage { - source: Default::default(), + source: Some(PeerId::random()), data: transformed_data, sequence_number: None, topic, diff --git a/tests/ollama_test.rs b/tests/ollama_test.rs deleted file mode 100644 index d084c69..0000000 --- a/tests/ollama_test.rs +++ /dev/null @@ -1,84 +0,0 @@ -#![allow(unused_imports)] - -use ollama_workflows::ollama_rs::{generation::completion::request::GenerationRequest, Ollama}; -use ollama_workflows::{Entry, Executor, Model, ProgramMemory, Workflow}; -use std::env; -use tokio_util::sync::CancellationToken; - -#[tokio::test] -#[ignore = "run this manually"] -async fn test_ollama_prompt() { - let model = "orca-mini".to_string(); - let ollama = Ollama::default(); - ollama.pull_model(model.clone(), false).await.unwrap(); - let prompt = "The sky appears blue during the day because of a process called scattering. \ - When sunlight enters the Earth's atmosphere, it collides with air molecules such as oxygen and nitrogen. \ - These collisions cause some of the light to be absorbed or reflected, which makes the colors we see appear more vivid and vibrant. \ - Blue is one of the brightest colors that is scattered the most by the atmosphere, making it visible to our eyes during the day. \ - What may be the question this answer?".to_string(); - - let response = ollama - .generate(GenerationRequest::new(model, prompt.clone())) - .await - .expect("Should generate response"); - println!("Prompt: {}\n\nResponse:{}", prompt, response.response); -} - -#[tokio::test] -#[ignore = "run this manually"] -async fn test_ollama_bad_model() { - let model = "thismodeldoesnotexistlol".to_string(); - let ollama = Ollama::default(); - assert!( - ollama.pull_model(model, false).await.is_err(), - "Should give error due to non-existing model." - ); -} - -#[tokio::test] -#[ignore = "run this manually"] -async fn test_ollama_workflow() { - let workflow = r#"{ - "name": "Simple", - "description": "This is a simple workflow", - "config": { - "max_steps": 5, - "max_time": 100, - }, - "tasks":[ - { - "id": "A", - "name": "Random Poem", - "description": "Writes a poem about Kapadokya.", - "prompt": "Please write a poem about Kapadokya.", - "operator": "generation", - "outputs": [ - { - "type": "write", - "key": "final_result", - "value": "__result" - } - ] - }, - { - "id": "__end", - "name": "end", - "description": "End of the task", - "prompt": "End of the task", - "operator": "end", - } - ], - "steps":[ - { - "source":"A", - "target":"end" - } - ] -}"#; - let workflow: Workflow = serde_json::from_str(workflow).unwrap(); - let exe = Executor::new(Model::Phi3Mini); - let mut memory = ProgramMemory::new(); - - let result = exe.execute(None, workflow, &mut memory).await; - println!("Result: {}", result); -} From 13af03e18d8f08e14e9bd5b84db48bd7938427eb Mon Sep 17 00:00:00 2001 From: erhant Date: Thu, 29 Aug 2024 19:59:09 +0300 Subject: [PATCH 2/6] print peers every interval [skip ci] --- src/p2p/client.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/p2p/client.rs b/src/p2p/client.rs index c798cf1..e270d90 100644 --- a/src/p2p/client.rs +++ b/src/p2p/client.rs @@ -340,10 +340,13 @@ impl P2PClient { let num_peers = gossipsub.all_peers().count(); let num_mesh_peers = gossipsub.all_mesh_peers().count(); - // print peers if the count has changed + // print peer counts + log::info!("Peer Count (mesh/all): {} / {}", num_mesh_peers, num_peers); + + // print peers themselves if the count has changed if num_peers != self.peer_count.0 || num_mesh_peers != self.peer_count.1 { self.peer_count = (num_peers, num_mesh_peers); - log::info!("Peer Count (mesh/all): {} / {}", num_mesh_peers, num_peers); + log::debug!( "All Peers:\n{}", gossipsub From bcabd2e74bf8d88916f88b2497e34d61b68dfa3e Mon Sep 17 00:00:00 2001 From: selimseker Date: Thu, 29 Aug 2024 20:42:40 +0300 Subject: [PATCH 3/6] add phi3.5 to startsh [skip ci] --- start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/start.sh b/start.sh index 9243a40..9331825 100755 --- a/start.sh +++ b/start.sh @@ -182,7 +182,7 @@ handle_ollama_env() { # if there is no ollama model given, do not add any ollama compose profile ollama_needed=false - ollama_models="nous-hermes2theta-llama3-8b phi3:medium phi3:medium-128k phi3:3.8b llama3.1:latest" + ollama_models="nous-hermes2theta-llama3-8b phi3:medium phi3:medium-128k phi3:3.8b phi3.5 llama3.1:latest" for m in $(echo "$DKN_MODELS" | tr ',' ' '); do case " $ollama_models " in *" $m "*) ollama_needed=true; break;; From 6c940747f614046bc77fd44b2dba0e2128359e6a Mon Sep 17 00:00:00 2001 From: erhant Date: Thu, 29 Aug 2024 22:41:55 +0300 Subject: [PATCH 4/6] fix linux docker network mode [skip ci] --- start.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/start.sh b/start.sh index 9331825..babea15 100755 --- a/start.sh +++ b/start.sh @@ -297,7 +297,12 @@ handle_ollama_env() { # use docker internal for the Ollama host OLLAMA_HOST=$DOCKER_HOST - DKN_DOCKER_NETWORK_MODE=bridge + + # if the OS is Linux, use host network mode + # otherwise bridge is fine, as set above + if [ "$OS" = "Linux" ]; then + DKN_DOCKER_NETWORK_MODE=host + fi fi echo "Ollama host: $OLLAMA_HOST (network mode: $DKN_DOCKER_NETWORK_MODE)" From aac2150ba26cadd11212f6a03ee6243c6c2ce960 Mon Sep 17 00:00:00 2001 From: erhant Date: Fri, 30 Aug 2024 13:19:27 +0300 Subject: [PATCH 5/6] rm default model in `.env.example` [skip ci] --- .env.example | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 66421f3..1f34c69 100644 --- a/.env.example +++ b/.env.example @@ -6,7 +6,8 @@ DKN_WALLET_SECRET_KEY= # You don't need to change this, simply copy and paste it. DKN_ADMIN_PUBLIC_KEY=0208ef5e65a9c656a6f92fb2c770d5d5e2ecffe02a6aade19207f75110be6ae658 # model1,model2,model3,... (comma separated, case-insensitive) -DKN_MODELS=phi3:3.8b +# example: phi3:3.8b,gpt-4o-mini +DKN_MODELS= ## DRIA (optional) ## # P2P address, you don't need to change this unless you really want this port. From 62384378c697e683d31ff8e8c6d7491e04e3a088 Mon Sep 17 00:00:00 2001 From: erhant Date: Mon, 2 Sep 2024 15:30:44 +0300 Subject: [PATCH 6/6] updated workflows with error handling, added model --- Cargo.lock | 4 ++-- Cargo.toml | 4 ++-- docs/NODE_GUIDE.md | 1 + src/config/ollama.rs | 15 +++++++++------ src/handlers/workflow.rs | 20 +++++++++++--------- start.sh | 2 +- 6 files changed, 26 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c1baab..300395f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1035,7 +1035,7 @@ dependencies = [ [[package]] name = "dkn-compute" -version = "0.1.6" +version = "0.1.7" dependencies = [ "async-trait", "base64 0.22.1", @@ -3458,7 +3458,7 @@ dependencies = [ [[package]] name = "ollama-workflows" version = "0.1.0" -source = "git+https://github.com/andthattoo/ollama-workflows?rev=d6b2e1e#d6b2e1e0259958bd4abe301f5c6c79b7c07b1dc2" +source = "git+https://github.com/andthattoo/ollama-workflows?rev=ba038f7#ba038f7f2f16199b4710b81af1bf230d9a158873" dependencies = [ "async-trait", "colored", diff --git a/Cargo.toml b/Cargo.toml index 83624fc..5a22708 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dkn-compute" -version = "0.1.6" +version = "0.1.7" edition = "2021" license = "Apache-2.0" readme = "README.md" @@ -46,7 +46,7 @@ sha3 = "0.10.8" fastbloom-rs = "0.5.9" # workflows -ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "d6b2e1e" } +ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "ba038f7" } # peer-to-peer libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "3c55e95", features = [ diff --git a/docs/NODE_GUIDE.md b/docs/NODE_GUIDE.md index 4eca1f8..9547525 100644 --- a/docs/NODE_GUIDE.md +++ b/docs/NODE_GUIDE.md @@ -136,6 +136,7 @@ Based on the resources of your machine, you must decide which models that you wi - `phi3:14b-medium-128k-instruct-q4_1` - `phi3:3.8b` - `llama3.1:latest` +- `llama3.1:8b-instruct-q8_0` - `phi3.5:3.8b` - `phi3.5:3.8b-mini-instruct-fp16` diff --git a/src/config/ollama.rs b/src/config/ollama.rs index 1031d45..343a107 100644 --- a/src/config/ollama.rs +++ b/src/config/ollama.rs @@ -210,11 +210,14 @@ impl OllamaConfig { log::warn!("Ignoring model {}: Workflow timed out", model); }, result = executor.execute(None, workflow, &mut memory) => { - if result.is_empty() { - log::warn!("Ignoring model {}: Workflow returned empty result", model); - } else { - log::info!("Accepting model {}", model); - return true; + match result { + Ok(_) => { + log::info!("Accepting model {}", model); + return true; + } + Err(e) => { + log::warn!("Ignoring model {}: Workflow failed with error {}", model, e); + } } } }; @@ -292,6 +295,6 @@ mod tests { let mut memory = ProgramMemory::new(); let result = exe.execute(None, workflow, &mut memory).await; - println!("Result: {}", result); + println!("Result: {}", result.unwrap()); } } diff --git a/src/handlers/workflow.rs b/src/handlers/workflow.rs index 2ac735f..26555af 100644 --- a/src/handlers/workflow.rs +++ b/src/handlers/workflow.rs @@ -76,7 +76,7 @@ impl ComputeHandler for WorkflowHandler { .get_any_matching_model(task.input.model)?; log::info!("Using model {} for task {}", model, task.task_id); - // execute workflow with cancellation + // prepare workflow executor let executor = if model_provider == ModelProvider::Ollama { Executor::new_at( model, @@ -91,26 +91,28 @@ impl ComputeHandler for WorkflowHandler { .input .prompt .map(|prompt| Entry::try_value_or_str(&prompt)); - let result: Option; + + // execute workflow with cancellation + let result: String; tokio::select! { _ = node.cancellation.cancelled() => { log::info!("Received cancellation, quitting all tasks."); return Ok(MessageAcceptance::Accept) }, exec_result = executor.execute(entry.as_ref(), task.input.workflow, &mut memory) => { - if exec_result.is_empty() { - return Err(format!("Got empty string result for task {}", task.task_id).into()); - } else { - result = Some(exec_result); + match exec_result { + Ok(exec_result) => { + result = exec_result; + } + Err(e) => { + return Err(format!("Workflow failed with error {}", e).into()); + } } } } - let result = result.ok_or::(format!("No result for task {}", task.task_id))?; // publish the result node.send_result(result_topic, &task.public_key, &task.task_id, result)?; - - // accept message, someone else may be included in the filter Ok(MessageAcceptance::Accept) } } diff --git a/start.sh b/start.sh index babea15..ecdcb48 100755 --- a/start.sh +++ b/start.sh @@ -182,7 +182,7 @@ handle_ollama_env() { # if there is no ollama model given, do not add any ollama compose profile ollama_needed=false - ollama_models="nous-hermes2theta-llama3-8b phi3:medium phi3:medium-128k phi3:3.8b phi3.5 llama3.1:latest" + ollama_models="nous-hermes2theta-llama3-8b phi3:medium phi3:medium-128k phi3:3.8b phi3.5 llama3.1:latest llama3.1:8b-instruct-q8_0" for m in $(echo "$DKN_MODELS" | tr ',' ' '); do case " $ollama_models " in *" $m "*) ollama_needed=true; break;;