Fuse inference kernels to reduce kernel launch overhead #815

Workflow file for this run

.github/workflows/docker-build.yml at 5bbe29e

	name: "docker-build"
	on:
	pull_request:
	paths:
	- "docker/**"
	- "!docker/README.md"
	- ".github/workflows/docker-build.yml"
	push:
	branches:
	- "inference"
	- "master"
	schedule:
	# Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated
	- cron: "0 8 * * 0"
	workflow_dispatch:

	# Cancel outdated workflows if they are still running
	concurrency:
	group: docker-build-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	docker-build:
	name: Build and Install FlexFlow in a Docker Container
	runs-on: ubuntu-20.04
	strategy:
	matrix:
	gpu_backend: ["cuda", "hip_rocm"]
	gpu_backend_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0", "5.3", "5.4", "5.5", "5.6"]
	# The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported
	exclude:
	- gpu_backend: "cuda"
	gpu_backend_version: "5.3"
	- gpu_backend: "cuda"
	gpu_backend_version: "5.4"
	- gpu_backend: "cuda"
	gpu_backend_version: "5.5"
	- gpu_backend: "cuda"
	gpu_backend_version: "5.6"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.1"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.2"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.3"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.4"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.5"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.6"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.7"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "11.8"
	- gpu_backend: "hip_rocm"
	gpu_backend_version: "12.0"
	fail-fast: false
	env:
	FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
	gpu_backend_version: ${{ matrix.gpu_backend_version }}
	# one of the two variables below will be unused
	cuda_version: ${{ matrix.gpu_backend_version }}
	hip_version: ${{ matrix.gpu_backend_version }}
	branch_name: ${{ github.head_ref \|\| github.ref_name }}
	timeout-minutes: 480
	steps:
	- name: Checkout Git Repository
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: Free additional space on runner
	env:
	deploy_needed: ${{ ( github.event_name == 'push' \|\| github.event_name == 'schedule' \|\| github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
	build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) \|\| ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }}
	run: \|
	if [[ $deploy_needed == "true" \|\| $build_needed == "true" ]]; then
	.github/workflows/helpers/free_space_on_runner.sh
	else
	echo "Skipping this step to save time"
	fi

	- name: Build Docker container
	env:
	deploy_needed: ${{ ( github.event_name == 'push' \|\| github.event_name == 'schedule' \|\| github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
	build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) \|\| ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }}
	run: \|
	# On push to inference, build for all compatible architectures, so that we can publish
	# a pre-built general-purpose image. On all other cases, only build for one architecture
	# to save time.
	if [[ $deploy_needed == "true" ]] ; then
	export FF_CUDA_ARCH=all
	export FF_HIP_ARCH=all
	./docker/build.sh flexflow
	elif [[ $build_needed == "true" ]]; then
	export FF_CUDA_ARCH=70
	export FF_HIP_ARCH=gfx1100,gfx1036
	./docker/build.sh flexflow
	else
	echo "Skipping build to save time"
	fi

	- name: Check availability of flexflow modules in Python
	if: ${{ matrix.gpu_backend == 'cuda' }}
	env:
	deploy_needed: ${{ ( github.event_name == 'push' \|\| github.event_name == 'schedule' \|\| github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
	build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) \|\| ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }}
	run: \|
	if [[ $deploy_needed == "true" \|\| $build_needed == "true" ]]; then
	if [[ $FF_GPU_BACKEND == "cuda" ]]; then
	docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'"
	else
	docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'"
	fi
	else
	echo "Skipping test to save time"
	fi

	- name: Publish Docker environment image (on push to inference)
	if: github.repository_owner == 'flexflow'
	env:
	FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }}
	deploy_needed: ${{ ( github.event_name == 'push' \|\| github.event_name == 'schedule' \|\| github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
	run: \|
	if [[ $deploy_needed == "true" ]]; then
	./docker/publish.sh flexflow-environment
	./docker/publish.sh flexflow
	else
	echo "No need to update Docker containers in ghrc.io registry at this time."
	fi

	notify-slack:
	name: Notify Slack in case of failure
	runs-on: ubuntu-20.04
	needs: docker-build
	if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }}
	steps:
	- name: Send Slack message
	env:
	SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	run: \|
	curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! <https://github.com/flexflow/FlexFlow/actions/runs/$GITHUB_RUN_ID\|(See here).> :x: \"}" $SLACK_WEBHOOK

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fuse inference kernels to reduce kernel launch overhead #815

Workflow file

Fuse inference kernels to reduce kernel launch overhead #815

Jobs

Run details

Workflow file for this run