[Intel]: add loop pipelining for prefetch in the pass pipeline #1682

Workflow file for this run

.github/workflows/build-test.yml at 5833762

	name: Build and test
	run-name: ${{ inputs.run_name }}

	on:
	workflow_dispatch:
	inputs:
	runner_label:
	description: Runner label, keep empty for default
	type: string
	default: ""
	upload_test_reports:
	description: Upload test reports
	type: boolean
	default: false
	ignore_errors:
	description: Ignore test errors
	type: boolean
	default: false
	run_name:
	description: Custom run name
	type: string
	default: "Build and test"
	pull_request:
	branches:
	- llvm-target
	push:
	branches:
	- llvm-target

	permissions: read-all

	env:
	TRITON_DISABLE_LINE_INFO: 1

	jobs:
	pre-commit:
	name: Pre-commit checks
	runs-on:
	- glados
	- spr
	- cpu
	steps:
	- name: Print inputs
	run: \|
	echo "${{ toJSON(github.event.inputs) }}"

	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	env:
	# Increase this value to reset cache
	CACHE_NUMBER: 1
	with:
	path: $HOME/.cache/pip
	key: pip-3.9-${{ hashFiles('.pre-commit-config.yaml') }}-${{ env.CACHE_NUMBER }}

	- name: Install Python 3.9
	uses: actions/setup-python@v5
	with:
	python-version: '3.9'

	- name: Run pre-commit checks
	run: \|
	pip install --upgrade pre-commit

	# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
	python3 -m pre_commit run --all-files --verbose yapf &> /dev/null \|\| true
	# If first run of yapf worked and made changes reset the tree to the original state
	git reset --hard

	python3 -m pre_commit run --show-diff-on-failure --color=always --all-files --verbose

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	integration-tests:
	name: Integration tests
	runs-on:
	- ${{ inputs.runner_label \|\| 'runner-0.0.13' }}
	strategy:
	matrix:
	python: ${{ github.ref_name == 'llvm-target' && fromJson('["3.9", "3.10", "3.11"]') \|\| fromJson('["3.9"]') }}
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	env:
	# Increase this value to reset cache
	CACHE_NUMBER: 1
	with:
	path: $HOME/.cache/pip
	key: pip-${{ matrix.python }}-${{ hashFiles('python/pyproject.toml', 'python/setup.py') }}-${{ env.CACHE_NUMBER }}

	- name: Install Python ${{ matrix.python }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python }}

	- name: Get LLVM commit id
	run: \|
	LLVM_COMMIT_ID=$(<cmake/llvm-hash.txt)
	echo "LLVM_COMMIT_ID=$LLVM_COMMIT_ID" >> $GITHUB_ENV

	- name: Setup PyTorch
	uses: ./.github/actions/setup-pytorch

	- name: Setup IPEX
	uses: ./.github/actions/setup-ipex

	- name: Build Triton
	run: \|
	export DEBUG=1
	cd python
	pip install wheel pytest pytest-xdist pytest-rerunfailures pytest-select
	pip install --no-build-isolation '.[build,tests,tutorials]'
	pip install git+https://github.com/kwasd/pytest-capturewarnings-ng.git@v1.1

	- name: Run lit tests
	run: \|
	cd python
	lit -v build/*/test

	- name: Create directory for tests reports
	run: \|
	mkdir reports
	echo "TRITON_TEST_REPORTS=true" >> $GITHUB_ENV
	echo "TRITON_TEST_WARNING_REPORTS=true" >> $GITHUB_ENV
	echo "TRITON_TEST_REPORTS_DIR=$GITHUB_WORKSPACE/reports" >> $GITHUB_ENV

	- name: Enable ignoring test errors
	if: inputs.ignore_errors
	run: \|
	echo "TRITON_TEST_IGNORE_ERRORS=true" >> $GITHUB_ENV

	- name: Run core tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/unit
	TRITON_TEST_SUITE=language \
	pytest -vvv -n 8 --device xpu language/ --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
	TRITON_TEST_SUITE=subprocess \
	pytest -vvv -n 8 language/test_subprocess.py
	# Run runtime tests serially to avoid race condition with cache handling
	TRITON_TEST_SUITE=runtime \
	pytest -vvv --device xpu runtime/
	# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
	TRITON_DISABLE_LINE_INFO=0 TRITON_TEST_SUITE=line_info \
	pytest -vvv --device xpu language/test_line_info.py

	- name: Clear cache
	run: \|
	rm -rf ~/.triton

	- name: Run interpreter tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/unit
	TRITON_INTERPRET=1 TRITON_TEST_SUITE=interpreter \
	pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \
	language/test_random.py operators/test_flash_attention.py::test_op --device cpu

	- name: Run partial operators tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/unit
	TRITON_TEST_SUITE=operators \
	pytest -vvv -n 8 --device xpu operators

	- name: Regression tests
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/test/regression
	TRITON_TEST_SUITE=regression \
	pytest -vvv -s --device xpu . --reruns 10 --ignore=test_performance.py

	- name: Run XPU python tests
	run: \|
	cd python/test/backend/third_party_backends
	python3 -m pytest -n auto --verbose test_xpu_backend.py

	- name: Run Tutorials
	run: \|
	source ./scripts/pytest-utils.sh
	cd python/tutorials
	run_tutorial_test "01-vector-add"
	run_tutorial_test "02-fused-softmax"
	run_tutorial_test "03-matrix-multiplication"
	run_tutorial_test "04-low-memory-dropout"
	run_tutorial_test "05-layer-norm"
	run_tutorial_test "06-fused-attention"
	run_tutorial_test "07-extern-functions"
	run_tutorial_test "08-grouped-gemm"
	run_tutorial_test "09-experimental-block-pointer"
	TRITON_INTEL_ENABLE_BLOCK_PTR=1 run_tutorial_test "09-experimental-block-pointer"

	- name: Run CXX unittests
	run: \|
	cd python/build/cmake
	ctest

	- name: Run E2E test
	run: \|
	# Set WORKSPACE for inductor_xpu_test.sh to make sure it creates "inductor_log" outside of pytorch cloned directory
	export WORKSPACE=$GITHUB_WORKSPACE
	cd pytorch
	TRANSFORMERS_VERSION="$(<.ci/docker/ci_commit_pins/huggingface.txt)"
	pip install pyyaml pandas scipy numpy psutil pyre_extensions torchrec transformers==$TRANSFORMERS_VERSION
	# TODO: Find the fastest Hugging Face model
	$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh huggingface float32 inference accuracy xpu 0 static 1 0 AlbertForMaskedLM
	# The script above always returns 0, so we need an additional check to see if the accuracy test passed
	cat $WORKSPACE/inductor_log///*.csv
	grep AlbertForMaskedLM $WORKSPACE/inductor_log///*.csv \| grep -q ,pass,

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

	- name: Pass rate
	run: \|
	python3 scripts/pass_rate.py --reports reports
	python3 scripts/pass_rate.py --reports reports --json > pass_rate.json

	- name: Upload pass rate report
	# upload reports only for the default branch
	if: github.ref_name == 'llvm-target'
	uses: actions/upload-artifact@v4
	with:
	name: pass_rate-${{ join(matrix.*, '-') }}
	path: pass_rate.json

	- name: Upload test reports
	if: inputs.upload_test_reports
	uses: actions/upload-artifact@v4
	with:
	name: test-reports-${{ join(matrix.*, '-') }}
	path: reports

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Intel]: add loop pipelining for prefetch in the pass pipeline #1682

Workflow file

[Intel]: add loop pipelining for prefetch in the pass pipeline #1682

Jobs

Run details

Workflow file for this run