Skip to content

Fix Falcon model, inference test in CI #810

Fix Falcon model, inference test in CI

Fix Falcon model, inference test in CI #810

Workflow file for this run

name: "docker-build"
on:
pull_request:
paths:
- "docker/**"
- "!docker/README.md"
- ".github/workflows/docker-build.yml"
push:
branches:
- "inference"
- "master"
schedule:
# Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated
- cron: "0 8 * * 0"
workflow_dispatch:
# Cancel outdated workflows if they are still running
concurrency:
group: docker-build-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
docker-build:
name: Build and Install FlexFlow in a Docker Container
runs-on: ubuntu-20.04
strategy:
matrix:
gpu_backend: ["cuda", "hip_rocm"]
gpu_backend_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0", "5.3", "5.4", "5.5", "5.6"]
# The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported
exclude:
- gpu_backend: "cuda"
gpu_backend_version: "5.3"
- gpu_backend: "cuda"
gpu_backend_version: "5.4"
- gpu_backend: "cuda"
gpu_backend_version: "5.5"
- gpu_backend: "cuda"
gpu_backend_version: "5.6"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.1"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.2"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.3"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.4"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.5"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.6"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.7"
- gpu_backend: "hip_rocm"
gpu_backend_version: "11.8"
- gpu_backend: "hip_rocm"
gpu_backend_version: "12.0"
fail-fast: false
env:
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
gpu_backend_version: ${{ matrix.gpu_backend_version }}
# one of the two variables below will be unused
cuda_version: ${{ matrix.gpu_backend_version }}
hip_version: ${{ matrix.gpu_backend_version }}
branch_name: ${{ github.head_ref || github.ref_name }}
timeout-minutes: 480
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
with:
submodules: recursive
- name: Free additional space on runner
env:
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }}
run: |
if [[ $deploy_needed == "true" || $build_needed == "true" ]]; then
.github/workflows/helpers/free_space_on_runner.sh
else
echo "Skipping this step to save time"
fi
- name: Build Docker container
env:
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }}
run: |
# On push to inference, build for all compatible architectures, so that we can publish
# a pre-built general-purpose image. On all other cases, only build for one architecture
# to save time.
if [[ $deploy_needed == "true" ]] ; then
export FF_CUDA_ARCH=all
export FF_HIP_ARCH=all
./docker/build.sh flexflow
elif [[ $build_needed == "true" ]]; then
export FF_CUDA_ARCH=70
export FF_HIP_ARCH=gfx1100,gfx1036
./docker/build.sh flexflow
else
echo "Skipping build to save time"
fi
- name: Check availability of flexflow modules in Python
if: ${{ matrix.gpu_backend == 'cuda' }}
env:
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }}
run: |
if [[ $deploy_needed == "true" || $build_needed == "true" ]]; then
if [[ $FF_GPU_BACKEND == "cuda" ]]; then
docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'"
else
docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'"
fi
else
echo "Skipping test to save time"
fi
- name: Publish Docker environment image (on push to inference)
if: github.repository_owner == 'flexflow'
env:
FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }}
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }}
run: |
if [[ $deploy_needed == "true" ]]; then
./docker/publish.sh flexflow-environment
./docker/publish.sh flexflow
else
echo "No need to update Docker containers in ghrc.io registry at this time."
fi
notify-slack:
name: Notify Slack in case of failure
runs-on: ubuntu-20.04
needs: docker-build
if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }}
steps:
- name: Send Slack message
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
run: |
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! <https://github.com/flexflow/FlexFlow/actions/runs/$GITHUB_RUN_ID|(See here).> :x: \"}" $SLACK_WEBHOOK