.github/workflows/pythonapp-gpu.yml

# Jenkinsfile.monai-premerge
name: premerge-gpu

on:
  # quick tests for pull requests and the releasing branches
  push:
    branches:
      - main
      - releasing/*
  pull_request:
    types: [opened, synchronize, closed]

concurrency:
  # automatically cancel the previously triggered workflows when there's a newer version
  group: build-gpu-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  GPU-quick-py3:  # GPU with full dependencies
    if: ${{ github.repository == 'Project-MONAI/MONAI' && github.event.pull_request.merged != true }}
    strategy:
      matrix:
        environment:
          - "PT18+CUDA102"
          - "PT19+CUDA114DOCKER"
          - "PT110+CUDA111"
          - "PT112+CUDA118DOCKER"
          - "PT113+CUDA116"
        include:
          # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
          - environment: PT18+CUDA102
            # pytorch 1.8.2 LTS
            pytorch: "torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu102"
            base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04"
          - environment: PT19+CUDA114DOCKER
            # 21.10: 1.10.0a0+0aef44c
            pytorch: "-h"  # we explicitly set pytorch to -h to avoid pip install error
            base: "nvcr.io/nvidia/pytorch:21.10-py3"
          - environment: PT110+CUDA111
            pytorch: "torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu111"
            base: "nvcr.io/nvidia/cuda:11.1.1-devel-ubuntu18.04"
          - environment: PT112+CUDA118DOCKER
            # 22.09: 1.13.0a0+d0d6b1f
            pytorch: "-h"  # we explicitly set pytorch to -h to avoid pip install error
            base: "nvcr.io/nvidia/pytorch:22.09-py3"
          - environment: PT113+CUDA116
            pytorch: "torch==1.13.1 torchvision==0.14.1"
            base: "nvcr.io/nvidia/cuda:11.6.1-devel-ubuntu18.04"
    container:
      image: ${{ matrix.base }}
      options: --gpus all --env NVIDIA_DISABLE_REQUIRE=true  # workaround for unsatisfied condition: cuda>=11.6
    runs-on: [self-hosted, linux, x64, common]
    steps:
    - uses: actions/checkout@v3
    - name: apt install
      if: github.event.pull_request.merged != true
      run: |
        # FIXME: workaround for https://github.com/Project-MONAI/MONAI/issues/4200
        apt-key del 7fa2af80 && rm -rf /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list
        apt-get update
        apt-get install -y wget
        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb
        dpkg -i cuda-keyring_1.0-1_all.deb

        if [ ${{ matrix.environment }} = "PT18+CUDA102" ] || \
          [ ${{ matrix.environment }} = "PT110+CUDA111" ] || \
          [ ${{ matrix.environment }} = "PT113+CUDA116" ]
        then
        PYVER=3.7 PYSFX=3 DISTUTILS=python3-distutils && \
        apt-get update && apt-get install -y --no-install-recommends \
          curl \
          pkg-config \
          python$PYVER \
          python$PYVER-dev \
          python$PYSFX-pip \
          $DISTUTILS \
          rsync \
          swig \
          unzip \
          zip \
          zlib1g-dev \
          libboost-locale-dev \
          libboost-program-options-dev \
          libboost-system-dev \
          libboost-thread-dev \
          libboost-test-dev \
          libgoogle-glog-dev \
          libjsoncpp-dev \
          cmake \
          git && \
        rm -rf /var/lib/apt/lists/* && \
        export PYTHONIOENCODING=utf-8 LC_ALL=C.UTF-8 && \
        rm -f /usr/bin/python && \
        rm -f /usr/bin/python`echo $PYVER | cut -c1-1` && \
        ln -s /usr/bin/python$PYVER /usr/bin/python && \
        ln -s /usr/bin/python$PYVER /usr/bin/python`echo $PYVER | cut -c1-1` &&
        curl -O https://bootstrap.pypa.io/get-pip.py && \
        python get-pip.py && \
        rm get-pip.py;
        fi
    - if: matrix.environment == 'PT19+CUDA114DOCKER'
      name: Optional Cupy dependency (cuda114)
      run: echo "cupy-cuda114" >> requirements-dev.txt
    - name: Install dependencies
      if: github.event.pull_request.merged != true
      run: |
        which python
        python -m pip install --upgrade pip wheel
        # fixes preinstalled ruamel_yaml error from the docker image
        rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
        python -m pip install ${{ matrix.pytorch }}
        python -m pip install -r requirements-dev.txt
        python -m pip list
        DRIVER_VERSION=$(cat /proc/driver/nvidia/version | head -1 | awk -F' ' '{print $8}')
        ls -ltr /usr/lib/x86_64-linux-gnu/libcuda* && ln -fs /usr/lib/x86_64-linux-gnu/libcuda.so.$DRIVER_VERSION /usr/lib/x86_64-linux-gnu/libcuda.so.1 || true
        ls -ltr /usr/lib64/libcuda* && ln -fs /usr/lib64/libcuda.so.$DRIVER_VERSION /usr/lib64/libcuda.so.1 || true
    - name: Run quick tests (GPU)
      if: github.event.pull_request.merged != true
      run: |
        git clone --depth 1 \
          https://github.com/Project-MONAI/MONAI-extra-test-data.git /MONAI-extra-test-data
        export MONAI_EXTRA_TEST_DATA="/MONAI-extra-test-data"
        nvidia-smi
        export LAUNCH_DELAY=$(python -c "import numpy; print(numpy.random.randint(30) * 10)")
        echo "Sleep $LAUNCH_DELAY"
        sleep $LAUNCH_DELAY
        export CUDA_VISIBLE_DEVICES=$(coverage run -m tests.utils | tail -n 1)
        echo $CUDA_VISIBLE_DEVICES
        trap 'if pgrep python; then pkill python; fi;' ERR
        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
        python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
        python -c "import monai; monai.config.print_config()"
        # build for the current self-hosted CI Tesla V100
        BUILD_MONAI=1 TORCH_CUDA_ARCH_LIST="7.0" ./runtests.sh --build --disttests
        ./runtests.sh --quick --unittests
        if [ ${{ matrix.environment }} = "PT18+CUDA102" ]; then
          # test the clang-format tool downloading once
          coverage run -m tests.clang_format_utils
        fi
        coverage xml --ignore-errors
        if pgrep python; then pkill python; fi
      shell: bash
    - name: Upload coverage
      if: ${{ github.head_ref != 'dev' && github.event.pull_request.merged != true }}
      uses: codecov/codecov-action@v3
      with:
        files: ./coverage.xml