-
Notifications
You must be signed in to change notification settings - Fork 127
/
azure-regression-tests.yml
155 lines (136 loc) · 6.28 KB
/
azure-regression-tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Azure pipelines for Marian NMT
#
# The pipeline need to be added manually to the repository, for example:
# 1. Go to Your repository > Pipelines, click "New pipeline"
# 2. Choose "Azure Repos Git" and a repository
# 3. Choose "Existing Azure Pipelines YAML file" and specify path to this file
# 4. "More actions" > "Save"
# The pipeline has no CI trigger and needs to be started manually, for example:
# 1. Got to the Pipeline created above
# 2. Click "Run pipeline" and select a "Branch/tag" you want to run it with
trigger: none
# Hosted Azure DevOps Pool determining OS, CUDA version and available GPUs
pool: mariandevops-pool-m60-eus
variables:
- group: marian-regression-tests
stages:
- stage: TestsGPU
jobs:
######################################################################
- job: TestsGPULinux
cancelTimeoutInMinutes: 1
displayName: Linux GPU tests
timeoutInMinutes: 120
steps:
- checkout: self
submodules: true
# librt.* from the default anaconda environment are deleted because they crash the linker at the
# end of compilation. This is an issue with the pre-defined VM image that is used for the Pool
# and will not persist for other images
- bash: |
rm -f /anaconda/envs/py38_default/x86_64-conda-linux-gnu/sysroot/usr/lib/librt.*
sudo apt-get install -y gcc-8 g++-8 p7zip-full
# TODO: There should be no need to install python3
sudo apt-get install -y python3.8 python3.8-dev python3.8-distutils python3.8-venv
sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
sudo update-alternatives --set python3 /usr/bin/python3.8
sudo apt-get install -y python3-pip
python3 -m pip install --upgrade Cython pip
displayName: Clean and install packages
# Collect details about CPU and GPU.
# Because the outputs goes into regression-tests/*.log files, they will be included in the artifact.
- bash: |
echo ">>> lscpu"
lscpu | tee lscpu.log
echo ">>> cpuinfo"
cat /proc/cpuinfo | tee cpuinfo.log
/usr/bin/gcc-8 --version | tee gcc.log
echo ">>> nvidia-smi"
nvidia-smi | tee nvidia-smi.log
echo ">>> python"
which python3 | tee python.log
python3 --version | tee -a python.log
python3 -m pip --version | tee -a python.log
echo ">>> df"
df -h | tee df.log
displayName: Collect system info
workingDirectory: regression-tests
# Always run regression tests from the master branch
# The current SAS token will expire on 12/31/2023 and a new one will need to be set in Marian > Pipelines > Library
# This is run at the beginning for easier debugging of the Python environment
- bash: |
set -x
git checkout master
git pull origin master
# Uninstall Cython because the newest 3.0.0 is incompatible with newest available versions of pyyaml and numpy as of July 2023
python3 -m pip uninstall -y cython
python3 -m pip install 'cython<3'
# These modules will be installed via `make install` below, but Cython needs to be installed before
python3 -m pip install 'pyyaml<6.0.1' 'numpy>=1.22,<2' websocket-client
make install
displayName: Prepare regression tests
env:
AZURE_STORAGE_SAS_TOKEN: $(marian-pub-tests-blob-sas-token)
workingDirectory: regression-tests
# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- bash: |
sudo mkdir -p /usr/share/keyrings
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/intel.gpg > /dev/null
echo "deb [signed-by=/usr/share/keyrings/intel.gpg] https://apt.repos.intel.com/mkl all main" | sudo tee /etc/apt/sources.list.d/intel-mkl.list
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
displayName: Install MKL
- bash: |
mkdir -p build
cd build
CC=/usr/bin/gcc-8 CXX=/usr/bin/g++-8 CUDAHOSTCXX=/usr/bin/g++-8 \
cmake .. \
-DCOMPILE_CPU=on \
-DUSE_FBGEMM=on \
-DCOMPILE_CUDA=on \
-DDETERMINISTIC=on \
-DUSE_STATIC_LIBS=on \
-DCOMPILE_EXAMPLES=on \
-DCOMPILE_SERVER=on \
-DCOMPILE_TESTS=on \
-DCOMPILE_MAXWELL=on -DCOMPILE_PASCAL=off -DCOMPILE_VOLTA=off -DCOMPILE_TURING=off -DCOMPILE_AMPERE=off -DCOMPILE_AMPERE_RTX=off \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.1 \
-DPYMARIAN=on -DUSE_TCMALLOC=off -DPYTHON_EXECUTABLE=python3
displayName: Configure CMake
- bash: make -j5
displayName: Compile
workingDirectory: build
- bash: |
./marian --version
./marian-decoder --version
./marian-scorer --version
./spm_encode --version
displayName: Print versions
workingDirectory: build
# Run unit tests with verbose output
- bash: ctest --verbose --output-on-failure
displayName: Run unit tests
workingDirectory: build
# Continue on error to be able to collect outputs and publish them as an artifact
- bash: MARIAN=../build ./run_mrt.sh
continueOnError: true
displayName: Run regression tests
workingDirectory: regression-tests
- bash: |
nvidia-smi
# cut -c3- removes './' from paths making 7z to retain the directory structure
find . -type f \( -name "*.log" -o -name "*.out" -o -name "*.diff" \) -print | cut -c3- > listing.txt
echo "Creating an artifact with the following files:"
cat listing.txt
7z a -tzip ../regression-tests-ci-public_linux-x64-static_cuda_m60.zip @listing.txt
displayName: Collect outputs
workingDirectory: regression-tests
- bash: |
python3 -m pip install build/pymarian-*.whl
python3 -m pymarian -v
python3 -m pip install pytest
python3 -m pytest src/python/tests/regression
displayName: Pymarian Install and Test
- publish: regression-tests-ci-public_linux-x64-static_cuda_m60.zip
artifact: regression-tests-ci-public_linux-x64-static_cuda_m60
displayName: Publish outputs