-
Notifications
You must be signed in to change notification settings - Fork 1
81 lines (71 loc) · 2.69 KB
/
test-gpu.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
name: Rust GPU Tests
on:
push:
concurrency:
group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}"
cancel-in-progress: true
jobs:
e2e:
runs-on: gpu
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Cache CUDA and NCCL
uses: actions/cache@v3
id: cache-cuda-nccl
with:
path: /usr/local/cuda-12.2
key: cuda-nccl-${{ runner.os }}-12.2
restore-keys: |
cuda-nccl-${{ runner.os }}-
- name: Install apt dependencies
run: sudo apt-get update && sudo apt-get install -y pkg-config libssl-dev
- name: Install CUDA and NCCL dependencies
if: steps.cache-cuda-nccl.outputs.cache-hit != 'true'
env:
DEBIAN_FRONTEND: noninteractive
run: |
wget -q https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run
sudo sh cuda_12.2.0_535.54.03_linux.run --silent --toolkit
echo 'export PATH=/usr/local/cuda-12.2/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
source ~/.bashrc
/usr/local/cuda-12.2/bin/nvcc --version
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo apt update
sudo apt install -y libnccl2 libnccl-dev
shell: bash
- name: Set up CUDA environment
run: |
echo "/usr/local/cuda-12.2/lib64" | sudo tee /etc/ld.so.conf.d/cuda.conf
sudo ldconfig
export PATH=/usr/local/cuda-12.2/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH
echo 'export PATH=/usr/local/cuda-12.2/bin:$PATH' >> $GITHUB_ENV
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH' >> $GITHUB_ENV
shell: bash
- name: Cache Rust build
uses: actions/cache@v3
id: cache-rust
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: rust-build-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
rust-build-${{ runner.os }}-
- name: Install Rust nightly
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly
- name: E2E Tests
run: |
source ~/.bashrc
/usr/local/cuda-12.2/bin/nvcc --version
cargo test --release e2e
shell: bash
env:
NCCL_P2P_DIRECT_DISABLE: 1
NCCL_NET: Socket