To profile the Intel GPU kernels with the SYCL event profiling time stamp instead of barrier time diff. #1584

Workflow file for this run

.github/workflows/build-test.yml at 81e6d06

	name: Build and test

	on:
	workflow_dispatch:
	inputs:
	runner_label:
	description: Runner label, keep empty for default
	type: string
	default: ""
	upload_test_reports:
	description: Upload test reports
	type: boolean
	default: false
	pull_request:
	branches:
	- llvm-target
	push:
	branches:
	- llvm-target

	permissions: read-all

	env:
	TRITON_DISABLE_LINE_INFO: 1

	jobs:
	pre-commit:
	name: Pre-commit checks
	runs-on:
	- glados
	- spr
	- cpu
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	env:
	# Increase this value to reset cache
	CACHE_NUMBER: 1
	with:
	path: $HOME/.cache/pip
	key: pip-3.9-${{ hashFiles('.pre-commit-config.yaml') }}-${{ env.CACHE_NUMBER }}

	- name: Install Python 3.9
	uses: actions/setup-python@v5
	with:
	python-version: '3.9'

	- name: Run pre-commit checks
	run: \|
	pip install --upgrade pre-commit

	# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
	python3 -m pre_commit run --all-files --verbose yapf &> /dev/null \|\| true
	# If first run of yapf worked and made changes reset the tree to the original state
	git reset --hard

	python3 -m pre_commit run --show-diff-on-failure --color=always --all-files --verbose

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	integration-tests:
	name: Integration tests
	runs-on:
	- ${{ inputs.runner_label \|\| 'runner-0.0.13' }}
	strategy:
	matrix:
	python: ${{ github.ref_name == 'llvm-target' && fromJson('["3.9", "3.10", "3.11"]') \|\| fromJson('["3.9"]') }}
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	env:
	# Increase this value to reset cache
	CACHE_NUMBER: 1
	with:
	path: $HOME/.cache/pip
	key: pip-${{ matrix.python }}-${{ hashFiles('python/pyproject.toml', 'python/setup.py') }}-${{ env.CACHE_NUMBER }}

	- name: Install Python ${{ matrix.python }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python }}

	- name: Get LLVM commit id
	run: \|
	LLVM_COMMIT_ID=$(<cmake/llvm-hash.txt)
	echo "LLVM_COMMIT_ID=$LLVM_COMMIT_ID" >> $GITHUB_ENV

	- name: Setup PyTorch
	uses: ./.github/actions/setup-pytorch

	- name: Setup IPEX
	uses: ./.github/actions/setup-ipex

	- name: Build Triton
	run: \|
	export DEBUG=1
	cd python
	pip install wheel pytest pytest-xdist pytest-rerunfailures pytest-select
	pip install --no-build-isolation '.[build,tests,tutorials]'
	pip install git+https://github.com/kwasd/pytest-capturewarnings-ng.git@v1.1

	- name: Run lit tests
	run: \|
	cd python
	lit -v build/*/test

	- name: Create directory for tests reports
	run: \|
	mkdir reports
	echo "TRITON_TEST_REPORTS=true" >> $GITHUB_ENV
	echo "TRITON_TEST_WARNING_REPORTS=true" >> $GITHUB_ENV
	echo "TRITON_TEST_REPORTS_DIR=$GITHUB_WORKSPACE/reports" >> $GITHUB_ENV

	- name: Run core tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/unit
	TRITON_TEST_SUITE=language \
	pytest -vvv -n 8 --device xpu language/ --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
	TRITON_TEST_SUITE=subprocess \
	pytest -vvv -n 8 language/test_subprocess.py
	# Run runtime tests serially to avoid race condition with cache handling
	TRITON_TEST_SUITE=runtime \
	pytest -vvv --device xpu runtime/
	# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
	TRITON_DISABLE_LINE_INFO=0 TRITON_TEST_SUITE=line_info \
	pytest -vvv --device xpu language/test_line_info.py

	- name: Clear cache
	run: \|
	rm -rf ~/.triton

	- name: Run interpreter tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/unit
	TRITON_INTERPRET=1 TRITON_TEST_SUITE=interpreter \
	pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \
	language/test_random.py operators/test_flash_attention.py::test_op --device cpu

	- name: Run partial operators tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/unit
	TRITON_TEST_SUITE=operators \
	pytest -vvv -n 8 --device xpu operators

	- name: Regression tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/regression
	TRITON_TEST_SUITE=regression \
	pytest -vvv -s --device xpu . --reruns 10 --ignore=test_performance.py

	- name: Run XPU python tests
	run: \|
	cd python/test/backend/third_party_backends
	python3 -m pytest -n auto --verbose test_xpu_backend.py

	- name: Run Tutorials
	run: \|
	cd python/tutorials
	python3 01-vector-add.py
	python3 02-fused-softmax.py
	python3 03-matrix-multiplication.py
	python3 04-low-memory-dropout.py
	python3 05-layer-norm.py
	python3 06-fused-attention.py
	python3 07-extern-functions.py
	python3 08-grouped-gemm.py
	python3 09-experimental-block-pointer.py
	TRITON_INTEL_ENABLE_BLOCK_PTR=1 python3 09-experimental-block-pointer.py

	- name: Run CXX unittests
	run: \|
	cd python/build/cmake
	ctest

	- name: Run E2E test
	run: \|
	# Set WORKSPACE for inductor_xpu_test.sh to make sure it creates "inductor_log" outside of pytorch cloned directory
	export WORKSPACE=$GITHUB_WORKSPACE
	cd pytorch
	TRANSFORMERS_VERSION="$(<.ci/docker/ci_commit_pins/huggingface.txt)"
	pip install pyyaml pandas scipy numpy psutil pyre_extensions torchrec transformers==$TRANSFORMERS_VERSION
	# TODO: Find the fastest Hugging Face model
	$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh huggingface float32 inference accuracy xpu 0 static 1 0 AlbertForMaskedLM
	# The script above always returns 0, so we need an additional check to see if the accuracy test passed
	cat $WORKSPACE/inductor_log///*.csv
	grep AlbertForMaskedLM $WORKSPACE/inductor_log///*.csv \| grep -q ,pass,

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	- name: Pass rate
	run: \|
	python3 scripts/pass_rate.py --reports reports
	python3 scripts/pass_rate.py --reports reports --json > pass_rate.json

	- name: Upload pass rate report
	# upload reports only for the default branch
	if: github.ref_name == 'llvm-target'
	uses: actions/upload-artifact@v4
	with:
	name: pass_rate-${{ join(matrix.*, '-') }}
	path: pass_rate.json

	- name: Upload test reports
	if: inputs.upload_test_reports
	uses: actions/upload-artifact@v4
	with:
	name: test-reports-${{ join(matrix.*, '-') }}
	path: reports

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

To profile the Intel GPU kernels with the SYCL event profiling time stamp instead of barrier time diff. #1584

Workflow file

To profile the Intel GPU kernels with the SYCL event profiling time stamp instead of barrier time diff. #1584

Jobs

Run details

Workflow file for this run