forked from Dao-AILab/flash-attention
-
Notifications
You must be signed in to change notification settings - Fork 49
90 lines (85 loc) · 3.23 KB
/
amd_tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
name: AMD Perf Kernel Tests
on:
workflow_dispatch:
pull_request:
branches: [main_perf]
merge_group:
branches: [main_perf]
types: [checks_requested]
push:
branches: [main_perf]
concurrency:
group: ${{ github.ref }}
cancel-in-progress: true
permissions: read-all
jobs:
Runner-Preparation-AMD:
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
steps:
- name: Prepare runner matrix
id: set-matrix
run: |
if [ x"${{ github.repository }}" == x"ROCm/flash-attention" ]; then
echo '::set-output name=matrix-HIP::[["self-hosted", "rocm", "gfx942"]]'
else
echo '::set-output name=matrix-HIP::[["ubuntu-latest"]]'
fi
Integration-Tests-AMD:
needs: Runner-Preparation-AMD
if: needs.Runner-Preparation-AMD.outputs.matrix-HIP != ''
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation-AMD.outputs.matrix-HIP)}}
container:
image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Triton
run: |
pip uninstall -y triton
pip install matplotlib pandas pytest
git clone https://github.com/triton-lang/triton
cd triton
pip install --verbose -e python
cd ..
- name: Build
run: |
python setup.py install
# - name: Flash Attention qkvpacked Tests
# run: |
# pytest tests/test_flash_attn.py::test_flash_attn_qkvpacked
# pytest tests/test_flash_attn.py::test_flash_attn_varlen_qkvpacked
# - name: Flash Attention output Tests
# run: |
# pytest tests/test_flash_attn.py::test_flash_attn_output
# pytest tests/test_flash_attn.py::test_flash_attn_varlen_output
# - name: Flash Attention causal Tests
# run: |
# pytest tests/test_flash_attn.py::test_flash_attn_causal
# pytest tests/test_flash_attn.py::test_flash_attn_varlen_causal
- name: Flash Attention kvcache Tests
run: |
pytest tests/test_flash_attn.py::test_flash_attn_kvcache
pytest tests/test_flash_attn.py::test_flash_attn_splitkv
- name: Flash Attention race condition Tests
run: |
pytest tests/test_flash_attn.py::test_flash_attn_race_condition
- name: Flash Attention bwd Tests
run: |
pytest tests/test_flash_attn.py::test_flash_attn_bwd_overflow
pytest tests/test_flash_attn.py::test_flash_attn_bwd_transpose
pytest tests/test_flash_attn.py::test_flash_attn_bwd_varlen_overflow
- name: Flash Attention deterministic Tests
run: |
pytest tests/test_flash_attn.py::test_flash_attn_deterministic
pytest tests/test_flash_attn.py::test_flash_attn_varlen_deterministic
- name: AMD Kernel Tests
run: |
pytest flash_attn/flash_attn_triton_kernel_decode_amd.py::test_op_fwd
pytest flash_attn/flash_attn_triton_kernel_prefill_amd.py