Download build artifacts from the backport branch for testing in the main
branch
#34
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "CI: Build and test" | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ | ||
github.ref_name == 'main' && format('ci-main-build-test-{0}', github.run_id) || | ||
format('ci-pr-build-test-on-{0}-against-branch-{1}', github.event_name, github.ref_name) | ||
}} | ||
cancel-in-progress: true | ||
on: | ||
push: | ||
branches: | ||
- "pull-request/[0-9]+" | ||
- "main" | ||
jobs: | ||
build: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
host-platform: | ||
- linux-64 | ||
- linux-aarch64 | ||
- win-64 | ||
python-version: | ||
- "3.13" | ||
- "3.12" | ||
- "3.11" | ||
- "3.10" | ||
- "3.9" | ||
cuda-version: | ||
# Note: this is for build-time only. | ||
- "12.6.2" | ||
name: Build (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}) | ||
if: ${{ github.repository_owner == 'nvidia' }} | ||
permissions: | ||
id-token: write # This is required for configure-aws-credentials | ||
contents: read # This is required for actions/checkout | ||
runs-on: ${{ (matrix.host-platform == 'linux-64' && 'linux-amd64-cpu8') || | ||
(matrix.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || | ||
(matrix.host-platform == 'win-64' && 'windows-2019') }} | ||
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }} | ||
outputs: | ||
BUILD_CTK_VER: ${{ steps.pass_env.outputs.CUDA_VERSION }} | ||
defaults: | ||
run: | ||
shell: bash --noprofile --norc -xeuo pipefail {0} | ||
steps: | ||
- name: Checkout ${{ github.event.repository.name }} | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
# WAR: setup-python is not relocatable... | ||
# see https://github.com/actions/setup-python/issues/871 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
if: ${{ startsWith(matrix.host-platform, 'linux') }} | ||
id: setup-python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: "3.12" | ||
- name: Set up MSVC | ||
if: ${{ startsWith(matrix.host-platform, 'win') }} | ||
uses: ilammy/msvc-dev-cmd@v1 | ||
- name: Set environment variables | ||
run: | | ||
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.') | ||
if [[ "${{ matrix.host-platform }}" == linux* ]]; then | ||
CIBW_BUILD="cp${PYTHON_VERSION_FORMATTED}-manylinux*" | ||
REPO_DIR=$(pwd) | ||
elif [[ "${{ matrix.host-platform }}" == win* ]]; then | ||
CIBW_BUILD="cp${PYTHON_VERSION_FORMATTED}-win_amd64" | ||
PWD=$(pwd) | ||
REPO_DIR=$(cygpath -w $PWD) | ||
fi | ||
echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV | ||
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}" | ||
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV | ||
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV | ||
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV | ||
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}" | ||
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV | ||
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV | ||
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV | ||
echo "CIBW_BUILD=${CIBW_BUILD}" >> $GITHUB_ENV | ||
# When the CI is run due to merging to main, we want it to populate GHA Cache not Artifacts, | ||
# so that CI workflows running on every branch have a fallback to use. | ||
if [[ "${{ github.ref_name}}" == main ]]; then | ||
echo "USE_CACHE=1" >> $GITHUB_ENV | ||
else | ||
echo "USE_CACHE=0" >> $GITHUB_ENV | ||
fi | ||
# TODO: revert me before merging; this is to test the cache restore in the PR | ||
echo "USE_CACHE=1" >> $GITHUB_ENV | ||
- name: Install dependencies | ||
if: ${{ env.USE_CACHE == '1' }} | ||
run: | | ||
# For GHA Cache | ||
dependencies=(zstd) | ||
dependent_exes=(zstd) | ||
not_found=0 | ||
for dep in ${dependent_exes[@]}; do | ||
if ! (command -v curl 2>&1 >/dev/null); then | ||
not_found=1 | ||
break | ||
fi | ||
done | ||
if [[ $not_found == 0 ]]; then | ||
echo "All dependencies are found. Do nothing." | ||
exit 0 | ||
fi | ||
if ! (command -v sudo 2>&1 >/dev/null); then | ||
if [[ $EUID == 0 ]]; then | ||
alias SUDO="" | ||
else | ||
echo "The following oprations require root access." | ||
exit 1 | ||
fi | ||
else | ||
alias SUDO="sudo" | ||
fi | ||
shopt -s expand_aliases | ||
SUDO apt update | ||
SUDO apt install -y ${dependencies[@]} | ||
- name: Dump environment | ||
run: | | ||
env | ||
- name: Build cuda.core wheel | ||
uses: pypa/cibuildwheel@v2.22.0 | ||
env: | ||
CIBW_BUILD: ${{ env.CIBW_BUILD }} | ||
CIBW_ARCHS_LINUX: "native" | ||
CIBW_BUILD_VERBOSITY: 1 | ||
with: | ||
package-dir: ./cuda_core/ | ||
output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} | ||
- name: List the cuda.core artifacts directory | ||
run: | | ||
if [[ "${{ matrix.host-platform }}" == win* ]]; then | ||
export CHOWN=chown | ||
else | ||
export CHOWN="sudo chown" | ||
fi | ||
$CHOWN -R $(whoami) ${{ env.CUDA_CORE_ARTIFACTS_DIR }} | ||
ls -lahR ${{ env.CUDA_CORE_ARTIFACTS_DIR }} | ||
- name: Check cuda.core wheel | ||
run: | | ||
pip install twine | ||
twine check ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl | ||
- name: Upload cuda.core build artifacts | ||
if: ${{ env.USE_CACHE == '0' }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl | ||
if-no-files-found: error | ||
overwrite: 'true' | ||
- name: Prepare cuda.core cache | ||
if: ${{ env.USE_CACHE == '1' }} | ||
run: | | ||
if [[ "${{ env.USE_CACHE }}" == 1 ]]; then | ||
# this file is uploaded to GHA Cache | ||
tar -c -f "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_CORE_ARTIFACTS_DIR }}" . | ||
du -h "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" | ||
# check if the previous runs from the same PR have populated the cache, if so need to clean it up | ||
CACHE_KEY=${{ env.CUDA_CORE_ARTIFACT_NAME }} | ||
if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then | ||
gh cache delete $CACHE_KEY | ||
fi | ||
fi | ||
- name: Cache cuda.core build artifacts | ||
if: ${{ env.USE_CACHE == '1' }} | ||
uses: actions/cache/save@v4 | ||
with: | ||
key: ${{ env.CUDA_CORE_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz | ||
- name: Set up mini CTK | ||
uses: ./.github/actions/fetch_ctk | ||
continue-on-error: false | ||
with: | ||
host-platform: ${{ matrix.host-platform }} | ||
cuda-version: ${{ matrix.cuda-version }} | ||
- name: Build cuda.bindings wheel | ||
uses: pypa/cibuildwheel@v2.22.0 | ||
env: | ||
CIBW_BUILD: ${{ env.CIBW_BUILD }} | ||
CIBW_ARCHS_LINUX: "native" | ||
CIBW_BUILD_VERBOSITY: 1 | ||
# CIBW mounts the host filesystem under /host | ||
CIBW_ENVIRONMENT_LINUX: > | ||
CUDA_PATH=/host/${{ env.CUDA_PATH }} | ||
PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }} | ||
CIBW_ENVIRONMENT_WINDOWS: > | ||
CUDA_HOME="$(cygpath -w ${{ env.CUDA_PATH }})" | ||
# PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }} | ||
with: | ||
package-dir: ./cuda_bindings/ | ||
output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} | ||
- name: List the cuda.bindings artifacts directory | ||
run: | | ||
if [[ "${{ matrix.host-platform }}" == win* ]]; then | ||
export CHOWN=chown | ||
else | ||
export CHOWN="sudo chown" | ||
fi | ||
$CHOWN -R $(whoami) ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} | ||
ls -lahR ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} | ||
# TODO: enable this after NVIDIA/cuda-python#297 is resolved | ||
# - name: Check cuda.bindings wheel | ||
# run: | | ||
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl | ||
- name: Prepare cuda.bindings cache | ||
if: ${{ env.USE_CACHE == '1' }} | ||
run: | | ||
if [[ "${{ env.USE_CACHE }}" == 1 ]]; then | ||
# this file is uploaded to GHA Cache | ||
tar -c -f "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" . | ||
du -h "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" | ||
# check if the previous runs from the same PR have populated the cache, if so need to clean it up | ||
CACHE_KEY=${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | ||
if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then | ||
gh cache delete $CACHE_KEY | ||
fi | ||
fi | ||
- name: Upload cuda.bindings build artifacts | ||
if: ${{ env.USE_CACHE == '0' }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl | ||
if-no-files-found: error | ||
overwrite: 'true' | ||
- name: Cache cuda.bindings build artifacts | ||
if: ${{ env.USE_CACHE == '1' }} | ||
uses: actions/cache/save@v4 | ||
with: | ||
key: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz | ||
- name: Pass environment variables to the next runner | ||
id: pass_env | ||
run: | | ||
echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT | ||
test: | ||
strategy: | ||
fail-fast: false | ||
# TODO: add driver version here | ||
matrix: | ||
host-platform: | ||
- linux-64 | ||
- linux-aarch64 | ||
# TODO: enable testing once win-64 GPU runners are up | ||
# - win-64 | ||
python-version: | ||
- "3.13" | ||
- "3.12" | ||
- "3.11" | ||
- "3.10" | ||
- "3.9" | ||
cuda-version: | ||
# Note: this is for test-time only. | ||
- "12.6.2" | ||
- "12.0.1" | ||
- "11.8.0" | ||
runner: | ||
- default | ||
include: | ||
- host-platform: linux-64 | ||
python-version: "3.12" | ||
cuda-version: "12.6.2" | ||
runner: H100 | ||
name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}) | ||
# The build stage could fail but we want the CI to keep moving. | ||
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} | ||
permissions: | ||
id-token: write # This is required for configure-aws-credentials | ||
contents: read # This is required for actions/checkout | ||
runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || | ||
(matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || | ||
(matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }} | ||
# Our self-hosted runners require a container | ||
# TODO: use a different (nvidia?) container | ||
container: | ||
options: -u root --security-opt seccomp=unconfined --shm-size 16g | ||
image: ubuntu:22.04 | ||
env: | ||
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} | ||
needs: | ||
- build | ||
defaults: | ||
run: | ||
shell: bash --noprofile --norc -xeuo pipefail {0} | ||
steps: | ||
- name: Ensure GPU is working | ||
run: nvidia-smi | ||
- name: Checkout ${{ github.event.repository.name }} | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
- name: Set environment variables | ||
run: | | ||
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.') | ||
if [[ "${{ matrix.host-platform }}" == linux* ]]; then | ||
REPO_DIR=$(pwd) | ||
elif [[ "${{ matrix.host-platform }}" == win* ]]; then | ||
PWD=$(pwd) | ||
REPO_DIR=$(cygpath -w $PWD) | ||
fi | ||
BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ needs.build.outputs.BUILD_CTK_VER }})" | ||
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" | ||
if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then | ||
SKIP_CUDA_BINDINGS_TEST=1 | ||
else | ||
SKIP_CUDA_BINDINGS_TEST=0 | ||
fi | ||
# make outputs from the previous job as env vars | ||
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}" | ||
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV | ||
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV | ||
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV | ||
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}" | ||
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV | ||
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV | ||
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV | ||
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV | ||
# We'll try GHA Artifacts first, and then fall back to GHA Cache | ||
- name: Download cuda.bindings build artifacts | ||
id: cuda-bindings-download | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} | ||
- name: Restore cuda.bindings cache | ||
if: ${{ failure() && steps.cuda-bindings-download.conclusion == 'failure' }} | ||
id: cuda-bindings-cache | ||
uses: actions/cache/restore@v4 | ||
with: | ||
key: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz | ||
restore-keys: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }} | ||
fail-on-cache-miss: true | ||
- name: Report cache restore status (hit) | ||
if: ${{ steps.cuda-bindings-cache.conclusion != 'skipped' && | ||
steps.cuda-bindings-cache.outputs.cache-hit == 'true' }} | ||
run: | | ||
echo "cache is found" | ||
CACHE_DIR="${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" | ||
CACHE_ARCHIVE="${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" | ||
ls -l $CACHE_ARCHIVE | ||
mkdir -p $CACHE_DIR | ||
du -h $CACHE_ARCHIVE && | ||
tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR && | ||
rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved." | ||
- name: Display structure of downloaded cuda.bindings artifacts | ||
run: | | ||
pwd | ||
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR | ||
- name: Download cuda.core build artifacts | ||
id: cuda-core-download | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} | ||
- name: Restore cuda.core cache | ||
if: ${{ failure() && steps.cuda-core-download.conclusion == 'failure' }} | ||
id: cuda-core-cache | ||
uses: actions/cache/restore@v4 | ||
with: | ||
key: ${{ env.CUDA_CORE_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz | ||
restore-keys: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }} | ||
fail-on-cache-miss: true | ||
- name: Report cache restore status (hit) | ||
if: ${{ steps.cuda-core-cache.conclusion != 'skipped' && | ||
steps.cuda-core-cache.outputs.cache-hit == 'true' }} | ||
run: | | ||
echo "cache is found" | ||
CACHE_DIR="${{ env.CUDA_CORE_ARTIFACTS_DIR }}" | ||
CACHE_ARCHIVE="${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" | ||
ls -l $CACHE_ARCHIVE | ||
mkdir -p $CACHE_DIR | ||
du -h $CACHE_ARCHIVE && | ||
tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR && | ||
rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved." | ||
- name: Display structure of downloaded cuda.core build artifacts | ||
run: | | ||
pwd | ||
ls -lahR $CUDA_CORE_ARTIFACTS_DIR | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Set up mini CTK | ||
uses: ./.github/actions/fetch_ctk | ||
continue-on-error: false | ||
with: | ||
host-platform: ${{ matrix.host-platform }} | ||
cuda-version: ${{ matrix.cuda-version }} | ||
- name: Run cuda.bindings tests | ||
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} | ||
run: | | ||
ls $CUDA_PATH | ||
pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" | ||
pip install *.whl | ||
popd | ||
pushd ./cuda_bindings | ||
pip install -r requirements.txt | ||
pytest -rxXs tests/ | ||
# TODO: enable cython tests | ||
#pytest tests/cython | ||
popd | ||
- name: Run cuda.core tests | ||
run: | | ||
if [[ ${{ matrix.python-version }} == "3.13" ]]; then | ||
# TODO: remove this hack once cuda-python has a cp313 build | ||
if [[ $SKIP_CUDA_BINDINGS_TEST == 1 ]]; then | ||
echo "Python 3.13 + cuda-python ${{ matrix.cuda-version }} is not supported, skipping the test..." | ||
exit 0 | ||
fi | ||
fi | ||
# If build/test majors match: cuda.bindings is installed in the previous step. | ||
# If mismatch: cuda.bindings is installed from PyPI. | ||
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" | ||
pushd "${CUDA_CORE_ARTIFACTS_DIR}" | ||
pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] | ||
popd | ||
pushd ./cuda_core | ||
pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" | ||
pytest -rxXs tests/ | ||
popd | ||
doc: | ||
name: Docs | ||
# The build stage could fail but we want the CI to keep moving. | ||
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} | ||
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages | ||
permissions: | ||
id-token: write | ||
contents: write | ||
needs: | ||
- build | ||
secrets: inherit | ||
uses: | ||
./.github/workflows/build-docs.yml | ||
with: | ||
build_ctk_ver: ${{ needs.build.outputs.BUILD_CTK_VER }} |