Skip to content

Attention projections (QKV, O) disaggregation #3846

Attention projections (QKV, O) disaggregation

Attention projections (QKV, O) disaggregation #3846

Workflow file for this run

name: "build"
on:
pull_request:
paths:
- "include/**"
- "inference/**"
- "cmake/**"
- "config/**"
- "deps/**"
- "python/**"
- "src/**"
- ".github/workflows/helpers/install_dependencies.sh"
- ".github/workflows/build.yml"
push:
branches:
- "master"
paths:
- "include/**"
- "inference/**"
- "cmake/**"
- "config/**"
- "deps/**"
- "python/**"
- "src/**"
- ".github/workflows/helpers/install_dependencies.sh"
- ".github/workflows/build.yml"
workflow_dispatch:
concurrency:
group: build-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
cmake-build:
name: Build FlexFlow with CMake
runs-on: ubuntu-20.04
defaults:
run:
shell: bash -l {0} # required to use an activated conda environment
strategy:
matrix:
gpu_backend: ["cuda", "hip_rocm"]
fail-fast: false
env:
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
with:
submodules: recursive
- name: Free additional space on runner
run: .github/workflows/helpers/free_space_on_runner.sh
- name: Install CUDA
uses: Jimver/cuda-toolkit@v0.2.16
if: ${{ matrix.gpu_backend == 'cuda' }}
id: cuda-toolkit
with:
cuda: "12.1.1"
# Disable caching of the CUDA binaries, since it does not give us any significant performance improvement
use-github-cache: "false"
log-file-suffix: 'cmake_${{matrix.gpu_backend}}.txt'
- name: Install system dependencies
run: .github/workflows/helpers/install_dependencies.sh
- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: flexflow
environment-file: conda/flexflow.yml
auto-activate-base: false
- name: Build FlexFlow
run: |
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export FF_HOME=$(pwd)
export FF_CUDA_ARCH=70
export FF_HIP_ARCH=gfx1100,gfx1036
export hip_version=5.6
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
else
export FF_BUILD_ALL_EXAMPLES=OFF
export FF_BUILD_UNIT_TESTS=OFF
fi
cores_available=$(nproc --all)
n_build_cores=$(( cores_available -1 ))
if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
mkdir build
cd build
../config/config.linux
make -j $n_build_cores
- name: Install FlexFlow
run: |
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export FF_HOME=$(pwd)
export FF_CUDA_ARCH=70
export FF_HIP_ARCH=gfx1100,gfx1036
export hip_version=5.6
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
else
export FF_BUILD_ALL_EXAMPLES=OFF
export FF_BUILD_UNIT_TESTS=OFF
fi
cd build
../config/config.linux
sudo make install
sudo ldconfig
- name: Run C++ unit tests
if: ${{ matrix.gpu_backend == 'cuda' }}
run: |
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
export FF_HOME=$(pwd)
sudo ln -s "$CUDA_PATH/lib64/stubs/libcuda.so" "$CUDA_PATH/lib64/stubs/libcuda.so.1"
cd build
./tests/unit/unit-test
- name: Check availability of flexflow modules in Python
run: |
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
fi
# Remove build folder to check that the installed version can run independently of the build files
rm -rf build
python -c "import flexflow.core; import flexflow.serve as ff; exit()"
makefile-build:
name: Build FlexFlow with the Makefile
runs-on: ubuntu-20.04
defaults:
run:
shell: bash -l {0} # required to use an activated conda environment
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
with:
submodules: recursive
- name: Free additional space on runner
run: .github/workflows/helpers/free_space_on_runner.sh
- name: Install CUDA
uses: Jimver/cuda-toolkit@v0.2.16
id: cuda-toolkit
with:
cuda: "12.1.1"
use-github-cache: "false"
log-file-suffix: 'makefile_${{matrix.gpu_backend}}.txt'
- name: Install system dependencies
run: .github/workflows/helpers/install_dependencies.sh
- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: flexflow
environment-file: conda/flexflow.yml
auto-activate-base: false
- name: Build FlexFlow
run: |
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
sudo ln -s "$CUDA_PATH/lib64/stubs/libcuda.so" "$CUDA_PATH/lib64/stubs/libcuda.so.1"
export FF_HOME=$(pwd)
cores_available=$(nproc --all)
n_build_cores=$(( cores_available -1 ))
if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
cd python
make -j $n_build_cores
python -c 'import flexflow.core'