Training Bootc image builds #64
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Training Bootc image builds | |
on: | |
schedule: # schedule the job to run at 12 AM daily | |
- cron: '0 12 * * *' | |
# pull_request: | |
# branches: | |
# - main | |
# paths: | |
# - .github/workflows/training_bootc.yaml | |
# - ./training/** | |
# push: | |
# branches: | |
# - main | |
# paths: | |
# - .github/workflows/training_bootc.yaml | |
# - ./training/** | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }} | |
cancel-in-progress: false | |
env: | |
REGISTRY: quay.io | |
REGISTRY_ORG: ai-lab | |
jobs: | |
build-podman-v5: | |
if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests') && github.repository == 'containers-mirror/ai-lab-recipes'" | |
env: | |
CGO_ENABLED: 1 # CGO is required for podman | |
runs-on: ubuntu-20.04 | |
steps: | |
- name: Cache podman bin | |
id: cache-podman-bin | |
uses: actions/cache@v3 | |
with: | |
path: | | |
./bin | |
key: ${{ runner.os }}-podman-${{ env.PODMAN_VER }} | |
restore-keys: | | |
${{ runner.os }}-podman | |
- uses: actions/checkout@v3 | |
if: steps.cache-podman-bin.outputs.cache-hit != 'true' | |
with: | |
repository: containers/podman | |
ref: v5.1.1 | |
- uses: actions/setup-go@v2 | |
if: steps.cache-podman-bin.outputs.cache-hit != 'true' | |
with: | |
go-version: ${{ env.GOVER }} | |
- name: Cache go modules | |
if: steps.cache-podman-bin.outputs.cache-hit != 'true' | |
uses: actions/cache@v2 | |
with: | |
# In order: | |
# * Module download cache | |
# * Build cache (Linux) | |
path: | | |
~/go/pkg/mod | |
~/.cache/go-build | |
key: ${{ runner.os }}-go-podman-${{ hashFiles('**/go.sum') }} | |
restore-keys: | | |
${{ runner.os }}-go-podman | |
- name: Add build packages | |
if: steps.cache-podman-bin.outputs.cache-hit != 'true' | |
run: sudo apt install -y libsystemd-dev libseccomp-dev pkg-config golang-github-proglottis-gpgme-dev | |
- name: Build podman v4 | |
if: steps.cache-podman-bin.outputs.cache-hit != 'true' | |
run: make binaries | |
# store podman binary as artifact | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: podman-bins | |
path: bin | |
nvidia-bootc-builder-image: | |
if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests') && github.repository == 'containers-mirror/ai-lab-recipes'" | |
strategy: | |
matrix: | |
include: | |
- image_name: nvidia-builder | |
context: training/nvidia-bootc | |
arch: amd64 | |
runs-on: ubuntu-24.04 | |
needs: build-podman-v5 | |
permissions: | |
contents: read | |
packages: write | |
steps: | |
- name: Remove unnecessary files | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf "$AGENT_TOOLSDIRECTORY" | |
- uses: actions/checkout@v4.1.7 | |
- name: Install qemu dependency | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y qemu-user-static | |
sudo apt-get install -y netavark containernetworking-plugins | |
- name: pull in podman | |
uses: actions/download-artifact@v1 | |
with: | |
name: podman-bins | |
path: bin | |
- name: replace | |
run: | | |
chmod +x bin/podman | |
sudo mv bin/podman /usr/bin/podman | |
- name: generate a ssh key - USER SHOULD INJECT THEIR OWN AND REBUILD IF THEY USE THIS IMAGE | |
run: | | |
ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N "" | |
- name: Build Image | |
id: build_image | |
run: make driver-toolkit ARCH=${{ matrix.arch }} | |
working-directory: ${{ matrix.context }} | |
- name: Login to Container Registry | |
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
uses: redhat-actions/podman-login@v1.7 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ secrets.REGISTRY_USER }} | |
password: ${{ secrets.REGISTRY_PASSWORD }} | |
- name: Push image | |
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
uses: redhat-actions/push-to-registry@v2.8 | |
with: | |
image: ${{ steps.build_image.outputs.image }} | |
tags: ${{ steps.build_image.outputs.tags }} | |
registry: ${{ env.REGISTRY }} | |
- name: Publish Job Results to Slack | |
id: slack | |
if: always() | |
uses: slackapi/slack-github-action@v1.26.0 | |
with: | |
payload: | | |
{ | |
"text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
nvidia-bootc-image: | |
needs: nvidia-bootc-builder-image | |
if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests') && github.repository == 'containers-mirror/ai-lab-recipes'" | |
strategy: | |
matrix: | |
include: | |
- image_name: nvidia-bootc | |
driver_version: "550.54.15" | |
context: training/nvidia-bootc | |
arch: amd64 | |
runs-on: ubuntu-22.04-8-cores | |
steps: | |
- name: Remove unnecessary files | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf "$AGENT_TOOLSDIRECTORY" | |
- uses: actions/checkout@v4.1.7 | |
- name: pull in podman | |
uses: actions/download-artifact@v1 | |
with: | |
name: podman-bins | |
path: bin | |
- name: replace | |
run: | | |
chmod +x bin/podman | |
sudo mv bin/podman /usr/bin/podman | |
- name: install packages | |
run: | | |
sudo apt-get install -y netavark containernetworking-plugins | |
- name: Build Image | |
id: build_image | |
run: make bootc DRIVER_VERSION=${{ matrix.driver_version }} ARCH=${{ matrix.arch }} | |
working-directory: ${{ matrix.context }} | |
- name: Login to Container Registry | |
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
uses: redhat-actions/podman-login@v1.7 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ secrets.REGISTRY_USER }} | |
password: ${{ secrets.REGISTRY_PASSWORD }} | |
- name: Push image | |
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
uses: redhat-actions/push-to-registry@v2.8 | |
with: | |
image: ${{ steps.build_image.outputs.image }} | |
tags: ${{ steps.build_image.outputs.tags }} | |
registry: ${{ env.REGISTRY }} | |
- name: Publish Job Results to Slack | |
id: slack | |
if: always() | |
uses: slackapi/slack-github-action@v1.26.0 | |
with: | |
payload: | | |
{ | |
"text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
bootc-images: | |
if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests') && github.repository == 'containers-mirror/ai-lab-recipes'" | |
strategy: | |
matrix: | |
include: | |
- image_name: intel-bootc | |
context: training/intel-bootc | |
arch: amd64 | |
gpu: intel | |
pull-images: quay.io/ai-lab/vllm:latest quay.io/ai-lab/deepspeed-trainer:latest | |
- image_name: amd-bootc | |
context: training/amd-bootc | |
arch: amd64 | |
gpu: amd | |
pull-images: quay.io/ai-lab/vllm:latest | |
runs-on: ubuntu-22.04-8-cores | |
needs: build-podman-v5 | |
continue-on-error: true | |
steps: | |
- name: Remove unnecessary files | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf "$AGENT_TOOLSDIRECTORY" | |
- uses: actions/checkout@v4.1.7 | |
- name: pull in podman | |
uses: actions/download-artifact@v1 | |
with: | |
name: podman-bins | |
path: bin | |
- name: replace | |
run: | | |
chmod +x bin/podman | |
sudo mv bin/podman /usr/bin/podman | |
- name: install packages | |
run: | | |
sudo apt-get install -y netavark containernetworking-plugins | |
- name: Login to Container Registry | |
uses: redhat-actions/podman-login@v1.7 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ secrets.REGISTRY_USER }} | |
password: ${{ secrets.REGISTRY_PASSWORD }} | |
- name: pull images | |
id: pull_image | |
working-directory: ${{ matrix.context }} | |
run: podman pull ${{ matrix.pull-images }} | |
- name: generate the local OCI assets | |
run: | | |
cd training | |
make -j vllm | |
make -j deepspeed | |
make -j instruct-${{ matrix.gpu}} | |
- name: Build Image | |
id: build_image | |
run: make bootc ARCH=${{ matrix.arch }} | |
working-directory: ${{ matrix.context }} | |
- name: Push image | |
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
uses: redhat-actions/push-to-registry@v2.8 | |
with: | |
image: ${{ steps.build_image.outputs.image }} | |
tags: ${{ steps.build_image.outputs.tags }} | |
registry: ${{ env.REGISTRY }} | |
- name: Publish Job Results to Slack | |
id: slack | |
if: always() | |
uses: slackapi/slack-github-action@v1.26.0 | |
with: | |
payload: | | |
{ | |
"text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} |