Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
Signed-off-by: sallyom <somalley@redhat.com>
  • Loading branch information
sallyom committed Apr 12, 2024
1 parent fe62a54 commit 6fbfd23
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 49 deletions.
45 changes: 35 additions & 10 deletions .github/workflows/model_servers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,27 @@ jobs:
- image_name: llamacpp_python
model: mistral
flavor: base
- image_name: llamacpp_python_vulkan
model: mistral
flavor: vulkan
directory: llamacpp_python
platforms: linux/amd64,linux/arm64
no_gpu: 1
#- image_name: llamacpp_python_vulkan
# model: mistral
# flavor: vulkan
# directory: llamacpp_python
# platforms: linux/arm64
# vulkan: 1
- image_name: llamacpp_python_cuda
model: mistral
flavor: cuda
directory: llamacpp_python
platforms: linux/amd64
cuda: 1
- image_name: whispercpp
model: whisper-small
flavor: base
directory: whispercpp
platforms: linux/amd64,linux/arm64
no_gpu: 1
runs-on: ubuntu-latest
permissions:
contents: read
Expand All @@ -46,6 +58,11 @@ jobs:
ports:
- 5000:5000
steps:
- name: Remove unnecessary files
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v4.1.1

- name: Install qemu dependency
Expand All @@ -58,13 +75,13 @@ jobs:
uses: redhat-actions/buildah-build@v2.13
with:
image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }}
platforms: linux/amd64, linux/arm64
platforms: ${{ matrix.platforms }}
tags: latest
containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.flavor }}/Containerfile
context: model_servers/${{ matrix.image_name }}/
containerfiles: ./model_servers/${{ matrix.directory }}/${{ matrix.flavor }}/Containerfile
context: model_servers/${{ matrix.directory }}/

- name: Download model
working-directory: ./model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.directory }}/
run: make ${{ matrix.model }}

- name: Set up Python
Expand All @@ -73,15 +90,23 @@ jobs:
python-version: '3.11'

- name: Install python dependencies
working-directory: ./model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.directory }}/
run: make install

- name: Run tests
working-directory: ./model_servers/${{ matrix.image_name }}/
- name: Run non-gpu tests
working-directory: ./model_servers/${{ matrix.directory }}/
if: ${{ matrix.no_gpu }}
run: make test
env:
IMAGE_NAME: ${{ matrix.image_name }}

- name: Run cuda test
working-directory: ./model_servers/${{ matrix.directory }}/
if: ${{ matrix.cuda }}
run: make test-cuda
env:
IMAGE_NAME: ${{ matrix.image_name }}

- name: Login to Container Registry
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: redhat-actions/podman-login@v1.7
Expand Down
17 changes: 11 additions & 6 deletions .github/workflows/rag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ on:
pull_request:
branches:
- main
# paths:
# - ./recipes/natural_language_processing/rag/**
# - .github/workflows/rag.yaml
paths:
- ./recipes/natural_language_processing/rag/**
- .github/workflows/rag.yaml
push:
branches:
- main
# paths:
# - ./recipes/natural_language_processing/rag/**
# - .github/workflows/rag.yaml
paths:
- ./recipes/natural_language_processing/rag/**
- .github/workflows/rag.yaml

workflow_dispatch:

Expand All @@ -32,6 +32,11 @@ jobs:
ports:
- 5000:5000
steps:
- name: Remove unnecessary files
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v4.1.1

- name: Install qemu dependency
Expand Down
36 changes: 8 additions & 28 deletions .github/workflows/testing-framework.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Testing Framework

on:
schedule: # schedule the job to run every hour
- cron: '0 */6 * * *'
- cron: '0 * * * *'

workflow_dispatch:

Expand Down Expand Up @@ -42,11 +42,6 @@ jobs:
- arch: amd64 # gpu enabled
aws_image_type: g4dn.xlarge
aws_ami_architecture: x86_64
- app_path: natural_language_processing/chatbot
- app_path: natural_language_processing/summarizer
- app_path: natural_language_processing/codegen
- app_path: natural_language_processing/rag
- app_path: audio/audio_to_text
steps:
- name: Checkout
uses: actions/checkout@v4.1.1
Expand Down Expand Up @@ -89,11 +84,11 @@ jobs:

- name: Ansible Collections
run: ansible-galaxy install -r ./provision/requirements.yml
working-directory: ./main/recipes/${{ matrix.app_path }}
working-directory: ./main/recipes/natural_language_processing/chatbot

- name: Provision
run: |
ansible-playbook ./main/recipes/${{ matrix.app_path }}/provision/playbook.yml \
ansible-playbook ./main/recipes/natural_language_processing/chatbot/provision/playbook.yml \
-i terraform-test-environment-module/hosts.ini \
--private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }}
env:
Expand All @@ -105,11 +100,11 @@ jobs:
python-version: '3.11'

- name: Install Dependencies
working-directory: ./main/recipes/${{ matrix.app_path }}
working-directory: ./main/recipes/natural_language_processing/chatbot
run: make install

- name: Run Integration Tests
working-directory: ./main/recipes/${{ matrix.app_path }}
working-directory: ./main/recipes/natural_language_processing/chatbot
run: make integration-tests
env:
URL: ${{ steps.terraform-output.outputs.url }}
Expand Down Expand Up @@ -144,14 +139,8 @@ jobs:
matrix:
include:
- image: llamacpp_python
- image: llamacpp_python_vulkan
- image: llamacpp_python_cuda
- image: whispercpp
- image: chatbot
- image: summarizer
- image: codegen
- image: rag
- image: transcribe
steps:
- name: Login to registry
uses: redhat-actions/podman-login@v1.7
Expand All @@ -178,23 +167,14 @@ jobs:
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

test-make-bootc:
test-make-targets:
if: github.repository == 'containers-mirror/ai-lab-recipes'
runs-on: ubuntu-22.04-2core
strategy:
fail-fast: false
matrix:
include:
- app_path: natural_language_processing/chatbot
- app_path: natural_language_processing/summarizer
- app_path: natural_language_processing/codegen
- app_path: natural_language_processing/rag
- app_path: audio/audio_to_text
steps:
- uses: actions/checkout@v4.1.1

- name:
working-directory: ./recipes/${{ matrix.app_path }}
- name: chatbot
working-directory: ./recipes/natural_language_processing/chatbot
run: make bootc

- name: Publish Job Results to Slack
Expand Down
20 changes: 16 additions & 4 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
APP := llamacpp_python
IMAGE_BASE := llamacpp-python
PORT := 8001

IMAGE := quay.io/ai-lab/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest
IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest
CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest

# ----- MODEL OPTIONS -----

Expand Down Expand Up @@ -43,7 +44,7 @@ build-cuda:

.PHONY: build-vulkan
build-vulkan:
podman build --squash-all -t $(VULKAN_IMAGE) . -f cuda/Containerfile
podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile

.PHONY: download-model-tiny-llama
download-model-tiny-llama:
Expand All @@ -67,6 +68,17 @@ run:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE)

# TODO: Add tests for llamacpp-cuda
# This never fails, placeholder for future test
.PHONY: run-cuda
run-cuda:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) || true

# TODO: Add tests for llamacpp-cuda
.PHONY: test-cuda
test-cuda: run-cuda

.PHONY: test
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
Expand Down
1 change: 1 addition & 0 deletions model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest_container
import os

# For cuda, will add this to below Container: extra_launch_args=["--device", "nvidia.com/gpu=all"],
MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
volume_mounts=[
Expand Down
1 change: 0 additions & 1 deletion model_servers/llamacpp_python/tests/test_alive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

CONTAINER_IMAGES = [MS]


def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
assert auto_container.connection.file("/etc/os-release").exists

Expand Down

0 comments on commit 6fbfd23

Please sign in to comment.