moving tests from mirror to regular #67

Workflow file for this run

.github/workflows/training_bootc.yaml at f0a6072

	name: Training Bootc image builds

	on:
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}
	cancel-in-progress: false

	env:
	REGISTRY: quay.io
	REGISTRY_ORG: ai-lab
	REGION: us-east-1

	jobs:
	start-runner:
	name: Start self-hosted EC2 runner
	if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')"
	runs-on: ubuntu-latest
	outputs:
	label: ${{ steps.start-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: ${{ env.REGION }}
	- name: Start EC2 runner
	id: start-ec2-runner
	uses: machulav/ec2-github-runner@v2
	with:
	mode: start
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	ec2-image-id: ami-04cec38d48a5be576
	ec2-instance-type: m7i.8xlarge
	subnet-id: subnet-0b1e1d94240813658
	security-group-id: sg-055105753f5e8bd83

	nvidia-bootc-builder-image:
	if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')"
	strategy:
	matrix:
	include:
	- image_name: nvidia-builder
	context: training/nvidia-bootc
	arch: amd64
	runs-on: ${{ needs.start-runner.outputs.label }}
	needs: start-runner
	permissions:
	contents: read
	packages: write
	steps:
	- uses: actions/checkout@v4.1.7

	- name: mkdir root/.docker directory
	run: \|
	mkdir -p ~/.docker

	- name: Login to Container Registry
	run: podman login -u ${{ secrets.REGISTRY_USER }} -p ${{ secrets.REGISTRY_PASSWORD }} ${{ env.REGISTRY }}

	- name: generate a ssh key - USER SHOULD INJECT THEIR OWN AND REBUILD IF THEY USE THIS IMAGE
	run: \|
	ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N ""

	- name: Build Image
	id: build_image
	run: make driver-toolkit ARCH=${{ matrix.arch }}
	working-directory: ${{ matrix.context }}

	- name: Push image
	if: github.event_name == 'push' && github.ref == 'refs/heads/main'
	uses: redhat-actions/push-to-registry@v2.8
	with:
	image: ${{ steps.build_image.outputs.image }}
	tags: ${{ steps.build_image.outputs.tags }}
	registry: ${{ env.REGISTRY }}

	- name: Publish Job Results to Slack
	id: slack
	if: always()
	uses: slackapi/slack-github-action@v1.26.0
	with:
	payload: \|
	{
	"text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

	nvidia-bootc-image:
	strategy:
	matrix:
	include:
	- image_name: nvidia-bootc
	driver_version: "550.54.15"
	context: training/nvidia-bootc
	arch: amd64
	runs-on: ${{ needs.start-runner.outputs.label }}
	if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')"
	needs: nvidia-bootc-builder-image
	steps:
	- uses: actions/checkout@v4.1.7

	- name: mkdir root/.docker directory
	run: \|
	mkdir -p ~/.docker

	- name: Login to Container Registry
	run: podman login -u ${{ secrets.REGISTRY_USER }} -p ${{ secrets.REGISTRY_PASSWORD }} ${{ env.REGISTRY }}

	- name: Build Image
	id: build_image
	run: make bootc DRIVER_VERSION=${{ matrix.driver_version }} ARCH=${{ matrix.arch }}
	working-directory: ${{ matrix.context }}

	- name: Push image
	if: github.event_name == 'push' && github.ref == 'refs/heads/main'
	uses: redhat-actions/push-to-registry@v2.8
	with:
	image: ${{ steps.build_image.outputs.image }}
	tags: ${{ steps.build_image.outputs.tags }}
	registry: ${{ env.REGISTRY }}

	- name: Publish Job Results to Slack
	id: slack
	if: always()
	uses: slackapi/slack-github-action@v1.26.0
	with:
	payload: \|
	{
	"text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

	bootc-images:
	strategy:
	matrix:
	include:
	- image_name: intel-bootc
	context: training/intel-bootc
	arch: amd64
	gpu: intel
	pull-images: quay.io/ai-lab/vllm:latest quay.io/ai-lab/deepspeed-trainer:latest
	- image_name: amd-bootc
	context: training/amd-bootc
	arch: amd64
	gpu: amd
	pull-images: quay.io/ai-lab/vllm:latest
	if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')"
	runs-on: ${{ needs.start-runner.outputs.label }}
	needs: start-runner
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4.1.7

	- name: mkdir root/.docker directory
	run: \|
	mkdir -p ~/.docker

	- name: Login to Container Registry
	run: podman login -u ${{ secrets.REGISTRY_USER }} -p ${{ secrets.REGISTRY_PASSWORD }} ${{ env.REGISTRY }}

	- name: pull images
	id: pull_image
	working-directory: ${{ matrix.context }}
	run: podman pull ${{ matrix.pull-images }}

	- name: generate the local OCI assets
	run: \|
	cd training
	make -j vllm
	make -j deepspeed
	make -j instruct-${{ matrix.gpu}}

	- name: Build Image
	id: build_image
	run: make bootc ARCH=${{ matrix.arch }}
	working-directory: ${{ matrix.context }}

	- name: Push image
	if: github.event_name == 'push' && github.ref == 'refs/heads/main'
	uses: redhat-actions/push-to-registry@v2.8
	with:
	image: ${{ steps.build_image.outputs.image }}
	tags: ${{ steps.build_image.outputs.tags }}
	registry: ${{ env.REGISTRY }}

	- name: Publish Job Results to Slack
	id: slack
	if: always()
	uses: slackapi/slack-github-action@v1.26.0
	with:
	payload: \|
	{
	"text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

	stop-runner:
	name: Stop self-hosted EC2 runner
	needs:
	- start-runner # required to get output from the start-runner job
	- bootc-images # required to wait when the main job is done
	runs-on: ubuntu-latest
	if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: ${{ secrets.AWS_REGION }}
	- name: Stop EC2 runner
	uses: machulav/ec2-github-runner@v2
	with:
	mode: stop
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	label: ${{ needs.start-runner.outputs.label }}
	ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

moving tests from mirror to regular #67

Workflow file

moving tests from mirror to regular #67

Jobs

Run details

Workflow file for this run