Fuse inference kernels to reduce kernel launch overhead #815
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "docker-build" | |
on: | |
pull_request: | |
paths: | |
- "docker/**" | |
- "!docker/README.md" | |
- ".github/workflows/docker-build.yml" | |
push: | |
branches: | |
- "inference" | |
- "master" | |
schedule: | |
# Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated | |
- cron: "0 8 * * 0" | |
workflow_dispatch: | |
# Cancel outdated workflows if they are still running | |
concurrency: | |
group: docker-build-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
docker-build: | |
name: Build and Install FlexFlow in a Docker Container | |
runs-on: ubuntu-20.04 | |
strategy: | |
matrix: | |
gpu_backend: ["cuda", "hip_rocm"] | |
gpu_backend_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0", "5.3", "5.4", "5.5", "5.6"] | |
# The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported | |
exclude: | |
- gpu_backend: "cuda" | |
gpu_backend_version: "5.3" | |
- gpu_backend: "cuda" | |
gpu_backend_version: "5.4" | |
- gpu_backend: "cuda" | |
gpu_backend_version: "5.5" | |
- gpu_backend: "cuda" | |
gpu_backend_version: "5.6" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.1" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.2" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.3" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.4" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.5" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.6" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.7" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "11.8" | |
- gpu_backend: "hip_rocm" | |
gpu_backend_version: "12.0" | |
fail-fast: false | |
env: | |
FF_GPU_BACKEND: ${{ matrix.gpu_backend }} | |
gpu_backend_version: ${{ matrix.gpu_backend_version }} | |
# one of the two variables below will be unused | |
cuda_version: ${{ matrix.gpu_backend_version }} | |
hip_version: ${{ matrix.gpu_backend_version }} | |
branch_name: ${{ github.head_ref || github.ref_name }} | |
timeout-minutes: 480 | |
steps: | |
- name: Checkout Git Repository | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Free additional space on runner | |
env: | |
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} | |
build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }} | |
run: | | |
if [[ $deploy_needed == "true" || $build_needed == "true" ]]; then | |
.github/workflows/helpers/free_space_on_runner.sh | |
else | |
echo "Skipping this step to save time" | |
fi | |
- name: Build Docker container | |
env: | |
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} | |
build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }} | |
run: | | |
# On push to inference, build for all compatible architectures, so that we can publish | |
# a pre-built general-purpose image. On all other cases, only build for one architecture | |
# to save time. | |
if [[ $deploy_needed == "true" ]] ; then | |
export FF_CUDA_ARCH=all | |
export FF_HIP_ARCH=all | |
./docker/build.sh flexflow | |
elif [[ $build_needed == "true" ]]; then | |
export FF_CUDA_ARCH=70 | |
export FF_HIP_ARCH=gfx1100,gfx1036 | |
./docker/build.sh flexflow | |
else | |
echo "Skipping build to save time" | |
fi | |
- name: Check availability of flexflow modules in Python | |
if: ${{ matrix.gpu_backend == 'cuda' }} | |
env: | |
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} | |
build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }} | |
run: | | |
if [[ $deploy_needed == "true" || $build_needed == "true" ]]; then | |
if [[ $FF_GPU_BACKEND == "cuda" ]]; then | |
docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" | |
else | |
docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" | |
fi | |
else | |
echo "Skipping test to save time" | |
fi | |
- name: Publish Docker environment image (on push to inference) | |
if: github.repository_owner == 'flexflow' | |
env: | |
FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} | |
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} | |
run: | | |
if [[ $deploy_needed == "true" ]]; then | |
./docker/publish.sh flexflow-environment | |
./docker/publish.sh flexflow | |
else | |
echo "No need to update Docker containers in ghrc.io registry at this time." | |
fi | |
notify-slack: | |
name: Notify Slack in case of failure | |
runs-on: ubuntu-20.04 | |
needs: docker-build | |
if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} | |
steps: | |
- name: Send Slack message | |
env: | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
run: | | |
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! <https://github.com/flexflow/FlexFlow/actions/runs/$GITHUB_RUN_ID|(See here).> :x: \"}" $SLACK_WEBHOOK |