forked from Project-MONAI/MONAI
-
Notifications
You must be signed in to change notification settings - Fork 0
148 lines (144 loc) · 6.54 KB
/
pythonapp-gpu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# Jenkinsfile.monai-premerge
name: premerge-gpu
on:
# quick tests for pull requests and the releasing branches
push:
branches:
- main
- releasing/*
pull_request:
types: [opened, synchronize, closed]
concurrency:
# automatically cancel the previously triggered workflows when there's a newer version
group: build-gpu-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
GPU-quick-py3: # GPU with full dependencies
if: ${{ github.repository == 'Project-MONAI/MONAI' && github.event.pull_request.merged != true }}
strategy:
matrix:
environment:
- "PT18+CUDA102"
- "PT19+CUDA114DOCKER"
- "PT110+CUDA111"
- "PT112+CUDA118DOCKER"
- "PT113+CUDA116"
include:
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
- environment: PT18+CUDA102
# pytorch 1.8.2 LTS
pytorch: "torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu102"
base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04"
- environment: PT19+CUDA114DOCKER
# 21.10: 1.10.0a0+0aef44c
pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
base: "nvcr.io/nvidia/pytorch:21.10-py3"
- environment: PT110+CUDA111
pytorch: "torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu111"
base: "nvcr.io/nvidia/cuda:11.1.1-devel-ubuntu18.04"
- environment: PT112+CUDA118DOCKER
# 22.09: 1.13.0a0+d0d6b1f
pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
base: "nvcr.io/nvidia/pytorch:22.09-py3"
- environment: PT113+CUDA116
pytorch: "torch==1.13.1 torchvision==0.14.1"
base: "nvcr.io/nvidia/cuda:11.6.1-devel-ubuntu18.04"
container:
image: ${{ matrix.base }}
options: --gpus all --env NVIDIA_DISABLE_REQUIRE=true # workaround for unsatisfied condition: cuda>=11.6
runs-on: [self-hosted, linux, x64, common]
steps:
- uses: actions/checkout@v3
- name: apt install
if: github.event.pull_request.merged != true
run: |
# FIXME: workaround for https://github.com/Project-MONAI/MONAI/issues/4200
apt-key del 7fa2af80 && rm -rf /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list
apt-get update
apt-get install -y wget
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb
dpkg -i cuda-keyring_1.0-1_all.deb
if [ ${{ matrix.environment }} = "PT18+CUDA102" ] || \
[ ${{ matrix.environment }} = "PT110+CUDA111" ] || \
[ ${{ matrix.environment }} = "PT113+CUDA116" ]
then
PYVER=3.7 PYSFX=3 DISTUTILS=python3-distutils && \
apt-get update && apt-get install -y --no-install-recommends \
curl \
pkg-config \
python$PYVER \
python$PYVER-dev \
python$PYSFX-pip \
$DISTUTILS \
rsync \
swig \
unzip \
zip \
zlib1g-dev \
libboost-locale-dev \
libboost-program-options-dev \
libboost-system-dev \
libboost-thread-dev \
libboost-test-dev \
libgoogle-glog-dev \
libjsoncpp-dev \
cmake \
git && \
rm -rf /var/lib/apt/lists/* && \
export PYTHONIOENCODING=utf-8 LC_ALL=C.UTF-8 && \
rm -f /usr/bin/python && \
rm -f /usr/bin/python`echo $PYVER | cut -c1-1` && \
ln -s /usr/bin/python$PYVER /usr/bin/python && \
ln -s /usr/bin/python$PYVER /usr/bin/python`echo $PYVER | cut -c1-1` &&
curl -O https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
rm get-pip.py;
fi
- if: matrix.environment == 'PT19+CUDA114DOCKER'
name: Optional Cupy dependency (cuda114)
run: echo "cupy-cuda114" >> requirements-dev.txt
- name: Install dependencies
if: github.event.pull_request.merged != true
run: |
which python
python -m pip install --upgrade pip wheel
# fixes preinstalled ruamel_yaml error from the docker image
rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
python -m pip install ${{ matrix.pytorch }}
python -m pip install -r requirements-dev.txt
python -m pip list
DRIVER_VERSION=$(cat /proc/driver/nvidia/version | head -1 | awk -F' ' '{print $8}')
ls -ltr /usr/lib/x86_64-linux-gnu/libcuda* && ln -fs /usr/lib/x86_64-linux-gnu/libcuda.so.$DRIVER_VERSION /usr/lib/x86_64-linux-gnu/libcuda.so.1 || true
ls -ltr /usr/lib64/libcuda* && ln -fs /usr/lib64/libcuda.so.$DRIVER_VERSION /usr/lib64/libcuda.so.1 || true
- name: Run quick tests (GPU)
if: github.event.pull_request.merged != true
run: |
git clone --depth 1 \
https://github.com/Project-MONAI/MONAI-extra-test-data.git /MONAI-extra-test-data
export MONAI_EXTRA_TEST_DATA="/MONAI-extra-test-data"
nvidia-smi
export LAUNCH_DELAY=$(python -c "import numpy; print(numpy.random.randint(30) * 10)")
echo "Sleep $LAUNCH_DELAY"
sleep $LAUNCH_DELAY
export CUDA_VISIBLE_DEVICES=$(coverage run -m tests.utils | tail -n 1)
echo $CUDA_VISIBLE_DEVICES
trap 'if pgrep python; then pkill python; fi;' ERR
python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
python -c "import monai; monai.config.print_config()"
# build for the current self-hosted CI Tesla V100
BUILD_MONAI=1 TORCH_CUDA_ARCH_LIST="7.0" ./runtests.sh --build --disttests
./runtests.sh --quick --unittests
if [ ${{ matrix.environment }} = "PT18+CUDA102" ]; then
# test the clang-format tool downloading once
coverage run -m tests.clang_format_utils
fi
coverage xml --ignore-errors
if pgrep python; then pkill python; fi
shell: bash
- name: Upload coverage
if: ${{ github.head_ref != 'dev' && github.event.pull_request.merged != true }}
uses: codecov/codecov-action@v3
with:
files: ./coverage.xml