Merge pull request #5 from backend-developers-ltd/return_hashes_from_…

…http return hashes of output files from http prompt answering
backend-developers-ltd · Dec 17, 2024 · fe539d9 · fe539d9
2 parents 6cf2bdc + caa6be2
commit fe539d9
Show file tree

Hide file tree

Showing 18 changed files with 2,379 additions and 845 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,43 @@
+name: Run QA
+
+on:
+  push:
+    branches: [master, main]
+  pull_request:
+    branches: [master, main]
+
+env:
+  PYTHON_DEFAULT_VERSION: "3.11"
+
+jobs:
+  test:
+    timeout-minutes: 10
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
+          cache: "pip"
+      - name: Install dependencies
+        run: python -m pip install --upgrade 'pdm==2.19.3'
+      - name: Setup virtualenv
+        run: |
+          pdm config venv.backend venv
+          python -m venv .venv
+          echo "$.venv/bin" >> $GITHUB_PATH
+          echo "VIRTUAL_ENV=${{ github.workspace }}/.venv" >> $GITHUB_ENV
+          echo "PDM_IGNORE_SAVED_PYTHON=1" >> $GITHUB_ENV
+      - name: Install dependencies
+        run: pdm sync --group :all
+      - name: Install test dependencies
+        run: pdm sync --group test; pdm add pytest 'requests>=2.32.3,<3.0.0'
+        # for the life of me I don't understand why pdm refuses to
+        # install pytest in github actions
+      - name: list
+        run: pdm list
+      - name: Run unit tests
+        run: pdm run python -m pytest tests/integration_mock/
diff --git a/Dockerfile b/Dockerfile
@@ -18,14 +18,14 @@ RUN mkdir /output
 
 COPY pdm.lock pyproject.toml /app/
 
+# Copy your Python script into the container
+COPY src/compute_horde_prompt_solver /app/compute_horde_prompt_solver
+
 RUN pdm install
 
 COPY download_model.py /app/
 
 RUN pdm run python download_model.py
 
-# Copy your Python script into the container
-COPY src/compute_horde_prompt_solver /app/compute_horde_prompt_solver
-
 # Set the entrypoint to run your script
-ENTRYPOINT ["pdm", "run", "python", "/app/compute_horde_prompt_solver/run.py", "--model=/app/saved_model/", "--output-dir=/output"]
+ENTRYPOINT ["pdm", "run", "python", "-m", "compute_horde_prompt_solver.run", "--model=/app/saved_model/", "--output-dir=/output"]
diff --git a/README.md b/README.md
@@ -6,13 +6,18 @@ A tool for generating responses to prompts using vLLM, primarily designed for us
 
 This project provides a script for generating responses to prompts using the vLLM library. It's designed to be flexible and can be run in various environments, including Docker containers and directly from Python.
 
+There is `--mock` that allows for running smoke tests that allow to validate the interface without actaully downloading
+a model or having a GPU.
+
 ## Features
 
 - Generate responses for multiple prompts
 - Configurable model parameters (temperature, top-p, max tokens)
 - Support for multiple input files
 - Deterministic output with seed setting
 - Docker support for easy deployment
+- Can be started with a seed known ad-hoc or as an http server which will wait for a seed and then call the model. 
+  This server is designed to serve one request and then be told to shut down 
 
 ## Installation
 
@@ -22,6 +27,10 @@ The project uses `pdm` for dependency management. To install dependencies:
 pdm install
 ```
 
+## Testing
+
+Tests in `integration_mock` are light and can be run on any platform, the ones in `integration_real_llm` will only pass
+with an actual nvidia A6000.    
 ## Usage
 
 ### Running with Docker
@@ -57,10 +66,7 @@ python run.py \
 To download the model for use in a Docker image:
 
 ```bash
-python download_model.py \
-  --model-name "microsoft/Phi-3.5-mini-instruct" \
-  --model-revision "cd6881a82d62252f5a84593c61acf290f15d89e3" \
-  --model-path "./saved_model/"
+python download_model.py
 ```
 
 ## Parameters

diff --git a/download_model.py b/download_model.py
@@ -27,6 +27,7 @@
     snapshot_download(
         repo_id=args.model_name,
         local_dir=args.model_path,
+        revision=args.model_revision,
     )
 
     print(f"Model files downloaded to {args.model_path}")
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,14 +8,19 @@ authors = [
 dependencies = [
     "huggingface-hub>=0.24.6",
     "deterministic-ml @ git+https://github.com/backend-developers-ltd/deterministic-ml.git@master",
-    "vllm>=0.6.0",
+    "vllm==0.6.0",
     "torch>=2.4.0",
     "setuptools>=74.1.2",
     "flask>=3.0.3",
 ]
 requires-python = "==3.11.*"
 readme = "README.md"
 license = {text = "MIT"}
+[dependency-groups]
+test = [
+    "pytest",
+    "requests>=2.0.0,<3.0.0"
+]
 
 
 [tool.pdm]

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+python_files = tests.py test_*.py *_tests.py
+filterwarnings =
+    error
+    default::DeprecationWarning
+    default:Error when trying to teardown test databases
+addopts = -s
diff --git a/src/compute_horde_prompt_solver/config.py b/src/compute_horde_prompt_solver/config.py
@@ -0,0 +1,97 @@
+import argparse
+import dataclasses
+import pathlib
+from typing import Optional, List
+
+
+@dataclasses.dataclass
+class Config:
+    input_files: List[pathlib.Path]
+    output_dir: pathlib.Path
+    model: str
+    max_tokens: int
+    temperature: float
+    top_p: float
+    dtype: str
+    seed: Optional[int]
+    server: Optional[bool]
+    server_port: int
+    mock: bool
+
+
+def parse_arguments() -> Config:
+    parser = argparse.ArgumentParser(
+        description="Generate responses for prompts using vLLM."
+    )
+    parser.add_argument(
+        "input_files",
+        nargs="+",
+        type=pathlib.Path,
+        help="Input files containing prompts",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./output",
+        type=pathlib.Path,
+        help="Directory to save output files",
+    )
+    parser.add_argument(
+        "--model", default="microsoft/Phi-3.5-mini-instruct", help="Model name or path"
+    )
+    parser.add_argument(
+        "--max-tokens",
+        type=int,
+        default=256,
+        help="Maximum number of tokens to generate",
+    )
+    parser.add_argument(
+        "--temperature", type=float, default=0, help="Sampling temperature"
+    )
+    parser.add_argument(
+        "--top-p", type=float, default=0.1, help="Top-p sampling parameter"
+    )
+    parser.add_argument(
+        "--dtype",
+        default="auto",
+        choices=("auto", "half", "float16", "bfloat16", "float", "float32"),
+        help=(
+            "model dtype - setting `float32` helps with deterministic prompts in different batches"
+        ),
+    )
+
+    seed_or_server_group = parser.add_mutually_exclusive_group(required=True)
+    seed_or_server_group.add_argument(
+        "--seed", type=int, help="Random seed for reproducibility"
+    )
+    seed_or_server_group.add_argument(
+        "--server",
+        action="store_true",
+        help="Spin up a temporary HTTP server to receive the seed",
+    )
+
+    parser.add_argument(
+        "--server-port",
+        type=int,
+        default=8000,
+        help="Port for temporary HTTP server",
+    )
+    parser.add_argument(
+        "--mock",
+        action="store_true",
+        help="Don't use an actual model, generate random gibberish based on the input and the seed",
+    )
+    args = parser.parse_args()
+
+    return Config(
+        input_files=args.input_files,
+        output_dir=args.output_dir,
+        model=args.model,
+        max_tokens=args.max_tokens,
+        temperature=args.temperature,
+        top_p=args.top_p,
+        dtype=args.dtype,
+        seed=args.seed,
+        server=args.server,
+        server_port=args.server_port,
+        mock=args.mock,
+    )