Skip to content

[Draft] To get reasonable performance on the default Triton passes pipeline. #1089

[Draft] To get reasonable performance on the default Triton passes pipeline.

[Draft] To get reasonable performance on the default Triton passes pipeline. #1089

Workflow file for this run

name: Build and test
on:
workflow_dispatch:
pull_request:
branches:
- llvm-target
push:
branches:
- llvm-target
permissions: read-all
env:
TRITON_DISABLE_LINE_INFO: 1
jobs:
pre-commit:
name: Pre-commit checks
runs-on:
- glados
- spr
- cpu
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Load pip cache
id: pip-cache
uses: ./.github/actions/load
env:
# Increase this value to reset cache
CACHE_NUMBER: 1
with:
path: $HOME/.cache/pip
key: pip-3.9-${{ hashFiles('.pre-commit-config.yaml') }}-${{ env.CACHE_NUMBER }}
- name: Install Python 3.9
uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Run pre-commit checks
run: |
pip install --upgrade pre-commit
# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
# If first run of yapf worked and made changes reset the tree to the original state
git reset --hard
python3 -m pre_commit run --show-diff-on-failure --color=always --all-files --verbose
- name: Save pip cache
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.pip-cache.outputs.path }}
dest: ${{ steps.pip-cache.outputs.dest }}
integration-tests:
name: Integration tests
runs-on:
- glados
- spr
- runner-0.0.12
strategy:
matrix:
python:
- "3.9"
- "3.10"
defaults:
run:
shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Load pip cache
id: pip-cache
uses: ./.github/actions/load
env:
# Increase this value to reset cache
CACHE_NUMBER: 1
with:
path: $HOME/.cache/pip
key: pip-${{ matrix.python }}-${{ hashFiles('python/pyproject.toml', 'python/setup.py') }}-${{ env.CACHE_NUMBER }}
- name: Install Python ${{ matrix.python }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Get LLVM commit id
run: |
LLVM_COMMIT_ID=$(<cmake/llvm-hash.txt)
echo "LLVM_COMMIT_ID=$LLVM_COMMIT_ID" >> $GITHUB_ENV
- name: Setup PyTorch
uses: ./.github/actions/setup-pytorch
- name: Setup IPEX
uses: ./.github/actions/setup-ipex
- name: Build Triton
run: |
export DEBUG=1
cd python
pip install wheel pytest pytest-xdist pytest-rerunfailures pytest-select
pip install --no-build-isolation '.[build,tests,tutorials]'
- name: Run lit tests
run: |
cd python
lit -v build/*/test
- name: Create directory for tests reports
run: |
mkdir ~/reports
echo "TRITON_TEST_REPORTS=true" >> $GITHUB_ENV
echo "TRITON_TEST_REPORTS_DIR=$HOME/reports" >> $GITHUB_ENV
- name: Run core tests
run: |
source ./scripts/pytest-utils.sh
cd python/test/unit
TRITON_TEST_SUITE=language \
pytest -vvv -n 8 --device xpu language/ --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
TRITON_TEST_SUITE=subprocess \
pytest -vvv -n 8 language/test_subprocess.py
# Run runtime tests serially to avoid race condition with cache handling
TRITON_TEST_SUITE=runtime \
pytest -vvv --device xpu runtime/
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 TRITON_TEST_SUITE=line_info \
pytest -vvv --device xpu language/test_line_info.py
- name: Clear cache
run: |
rm -rf ~/.triton
- name: Run interpreter tests
run: |
source ./scripts/pytest-utils.sh
cd python/test/unit
TRITON_INTERPRET=1 TRITON_TEST_SUITE=interpreter \
pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \
language/test_random.py operators/test_flash_attention.py::test_op --device cpu
- name: Run partial operators tests
run: |
source ./scripts/pytest-utils.sh
cd python/test/unit
TRITON_TEST_SUITE=operators \
pytest -vvv -n 8 --device xpu operators
- name: Regression tests
run: |
source ./scripts/pytest-utils.sh
cd python/test/regression
TRITON_TEST_SUITE=regression \
python3 -m pytest --junitxml=~/reports/regression.xml -vvv -s --device xpu . --reruns 10 --ignore=test_performance.py
- name: Run XPU python tests
run: |
cd python/test/backend/third_party_backends
python3 -m pytest -n auto --verbose test_xpu_backend.py
- name: Run Tutorials
run: |
cd python/tutorials
python3 01-vector-add.py
python3 02-fused-softmax.py
python3 03-matrix-multiplication.py
python3 04-low-memory-dropout.py
python3 05-layer-norm.py
python3 06-fused-attention.py
python3 07-extern-functions.py
python3 08-grouped-gemm.py
python3 09-experimental-block-pointer.py
- name: Run CXX unittests
run: |
cd python/build/*cmake*
ctest
- name: Run E2E test
run: |
# Set WORKSPACE for inductor_xpu_test.sh to make sure it creates "inductor_log" outside of pytorch cloned directory
export WORKSPACE=$GITHUB_WORKSPACE
cd pytorch
TRANSFORMERS_VERSION="$(<.ci/docker/ci_commit_pins/huggingface.txt)"
pip install pyyaml pandas scipy numpy psutil pyre_extensions torchrec transformers==$TRANSFORMERS_VERSION
# TODO: Find the fastest Hugging Face model
$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh huggingface float32 inference accuracy xpu 0 static 1 0 AlbertForMaskedLM
# The script above always returns 0, so we need an additional check to see if the accuracy test passed
cat $WORKSPACE/inductor_log/*/*/*.csv
grep AlbertForMaskedLM $WORKSPACE/inductor_log/*/*/*.csv | grep -q ,pass,
- name: Save pip cache
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
uses: ./.github/actions/save
with:
path: ${{ steps.pip-cache.outputs.path }}
dest: ${{ steps.pip-cache.outputs.dest }}
- name: Pass rate
run: |
python3 scripts/pass_rate.py --reports ~/reports
python3 scripts/pass_rate.py --reports ~/reports --json > pass_rate.json
- name: Upload pass rate report
# upload reports only for the default branch
if: github.ref_name == 'llvm-target'
uses: actions/upload-artifact@v4
with:
name: pass_rate-${{ join(matrix.*, '-') }}
path: pass_rate.json