Skip to content

Commit

Permalink
Benchmark improvements (#2415)
Browse files Browse the repository at this point in the history
I've picked up some useful changes from
#1905 and
pushed them here. Also organized python library build.

So basically it is a clean up with some features added and get it ready
for the 2025 release

List of changes:

- Add benchmark project artifacts to gitignore
- Compile python library using dedicated cmake api:
https://cmake.org/cmake/help/latest/module/FindPython3.html
- Suppress old style warning of xetla library
- Prevent cmake build on clean up commands (before that it was
unconditional
- If there is no ipex, library will be compiled in no ipex mode with
user warning
- More modular setup.py
- Verbose output of cmake commands being run
- CMakeLists.txt cleanup
- Fix shadow import usage of cmake library

Closes #1905
  • Loading branch information
ZzEeKkAa authored Oct 16, 2024
1 parent 4e41541 commit f4fdd8f
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 48 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,19 @@ build-*/
python/build/
python/dist/
python/triton*.egg-info/
python/*.whl

python/triton/_C/*.pyd
python/triton/_C/*.so
python/triton/_C/*.dylib

benchmarks/dist
benchmarks/*.egg-info/
benchmarks/**/*.so

# Logs
inductor_log/

# Backends copied from submodules
python/triton/backends/
!python/triton/backends/__init__.py
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ if(NOT WIN32)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
endif()

find_package(Python3 COMPONENTS Interpreter)
find_package(Python3 REQUIRED
COMPONENTS Development.Module)
find_package(Torch REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python PATH "${TORCH_INSTALL_PREFIX}/lib")
find_package(XeTLALibrary REQUIRED)

if(USE_IPEX)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_IPEX")
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/cmake/FindXeTLALibrary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
include(FetchContent)

if (NOT XeTLALibrary_FOUND)
# TODO: switch ot FetchContent_MakeAvailable once XeTLA supports it
cmake_policy(SET CMP0169 OLD)

set(XeTLALibrary_SOURCE_DIR
"${CMAKE_CURRENT_BINARY_DIR}/XeTLALibrary")
message(STATUS "XeTLALibrary is not specified. Will try to download
XeTLA library from https://github.com/intel/xetla into
${XeTLALibrary_SOURCE_DIR}")
file(READ xetla-library.conf XeTLALibrary_TAG)
file(READ xetla_kernel/xetla-library.conf XeTLALibrary_TAG)
# Strip the potential trailing newline from tag
string(STRIP "${XeTLALibrary_TAG}" XeTLALibrary_TAG)
FetchContent_Declare(xetla-library
Expand Down
130 changes: 91 additions & 39 deletions benchmarks/setup.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,135 @@
import os
import re
import shutil
import subprocess
import sysconfig
import sys

from setuptools import setup
# TODO: update once there is replacement for clean:
# https://github.com/pypa/setuptools/discussions/2838
from distutils import log # pylint: disable=[deprecated-module]
from distutils.dir_util import remove_tree # pylint: disable=[deprecated-module]
from distutils.command.clean import clean as _clean # pylint: disable=[deprecated-module]

from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext as _build_ext

import torch

ipex_cmake_prefix_path = ""
USE_IPEX_OPTION = os.getenv("USE_IPEX", "1")
if USE_IPEX_OPTION == "1":
import intel_extension_for_pytorch
ipex_cmake_prefix_path = f";{intel_extension_for_pytorch.cmake_prefix_path}"

class CMakeExtension(Extension):

def __init__(self, name):
# don't invoke the original build_ext for this special extension
super().__init__(name, sources=[])


class CMakeBuild():

def __init__(self):
def __init__(self, debug=False, dry_run=False):
self.current_dir = os.path.abspath(os.path.dirname(__file__))
self.build_temp = self.current_dir + "/build/temp"
self.extdir = self.current_dir + "/triton_kernels_benchmark"
self.build_type = self.get_build_type(debug)
self.cmake_prefix_paths = [torch.utils.cmake_prefix_path]
self.use_ipex = False
self.dry_run = dry_run

def get_build_type(self, debug):
DEBUG_OPTION = os.getenv("DEBUG", "0")
return "Debug" if debug or (DEBUG_OPTION == "1") else "Release"

def run(self):
try:
out = subprocess.check_output(["cmake", "--version"])
except OSError as error:
raise RuntimeError("CMake must be installed") from error
self.check_ipex()
self.build_extension()

match = re.search(r"version\s*(?P<major>\d+)\.(?P<minor>\d+)([\d.]+)?", out.decode())
cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor"))
if (cmake_major, cmake_minor) < (3, 18):
raise RuntimeError("CMake >= 3.18.0 is required")
def check_ipex(self):
self.use_ipex = os.getenv("USE_IPEX", "1") == "1"
if not self.use_ipex:
return
try:
import intel_extension_for_pytorch
except ImportError:
log.warn("ipex is not installed trying to build without ipex")
self.use_ipex = False
return
self.cmake_prefix_paths.append(intel_extension_for_pytorch.cmake_prefix_path)

self.build_extension()
def check_call(self, *popenargs, **kwargs):
log.info(" ".join(popenargs[0]))
if not self.dry_run:
subprocess.check_call(*popenargs, **kwargs)

def build_extension(self):
ninja_dir = shutil.which("ninja")
# create build directories
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
# python directories
python_include_dir = sysconfig.get_path("platinclude")
cmake_args = [
"-G",
"Ninja", # Ninja is much faster than make
"-DCMAKE_MAKE_PROGRAM=" +
ninja_dir, # Pass explicit path to ninja otherwise cmake may cache a temporary path
f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}{ipex_cmake_prefix_path}",
f"-DUSE_IPEX={USE_IPEX_OPTION}",
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
"-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=" + self.extdir,
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + self.extdir,
"-DPython3_EXECUTABLE:FILEPATH=" + sys.executable,
"-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
"-DPYTHON_INCLUDE_DIRS=" + python_include_dir,
"-DCMAKE_PREFIX_PATH=" + ";".join(self.cmake_prefix_paths),
f"-DUSE_IPEX={int(self.use_ipex)}",
"-DCMAKE_INSTALL_PREFIX=" + self.extdir,
"-DPython3_ROOT_DIR:FILEPATH=" + sys.exec_prefix,
"-DCMAKE_VERBOSE_MAKEFILE=TRUE",
"-DCMAKE_C_COMPILER=icx",
"-DCMAKE_CXX_COMPILER=icpx",
"-DCMAKE_BUILD_TYPE=" + self.build_type,
"-S",
self.current_dir,
"-B",
self.build_temp,
]

# configuration
build_type = "Debug"
build_args = ["--config", build_type]
cmake_args += ["-DCMAKE_BUILD_TYPE=" + build_type]
max_jobs = os.getenv("MAX_JOBS", str(2 * os.cpu_count()))
build_args += ["-j" + max_jobs]
build_args = [
"--build",
self.build_temp,
"-j" + max_jobs,
]

install_args = [
"--build",
self.build_temp,
"--target",
"install",
]

env = os.environ.copy()
cmake_dir = self.build_temp
subprocess.check_call(["cmake", self.current_dir] + cmake_args, cwd=cmake_dir, env=env)
subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=cmake_dir)
self.check_call(["cmake"] + cmake_args, env=env)
self.check_call(["cmake"] + build_args)
self.check_call(["cmake"] + install_args)

def clean(self):
if os.path.exists(self.build_temp):
remove_tree(self.build_temp, dry_run=self.dry_run)
else:
log.warn("'%s' does not exist -- can't clean it", os.path.relpath(self.build_temp,
os.path.dirname(__file__)))


class build_ext(_build_ext):

def run(self):
cmake = CMakeBuild(debug=self.debug, dry_run=self.dry_run)
cmake.run()
super().run()


class clean(_clean):

def run(self):
cmake = CMakeBuild(dry_run=self.dry_run)
cmake.clean()
super().run()

cmake = CMakeBuild()
cmake.run()

setup(name="triton-kernels-benchmark", packages=[
"triton_kernels_benchmark",
], package_dir={
"triton_kernels_benchmark": "triton_kernels_benchmark",
}, package_data={"triton_kernels_benchmark": ["xetla_kernel.so"]})
}, package_data={"triton_kernels_benchmark": ["xetla_kernel.cpython-*.so"]}, cmdclass={
"build_ext": build_ext,
"clean": clean,
}, ext_modules=[CMakeExtension("triton_kernels_benchmark")])
10 changes: 3 additions & 7 deletions benchmarks/xetla_kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# XeTLA library is required.
find_package(XeTLALibrary REQUIRED)
set(CMAKE_CXX_STANDARD 20)

set(XETLA_KERNEL_FLAGS ${XETLA_KERNEL_FLAGS}
-fsycl
-fsycl-device-code-split=per_kernel
Expand Down Expand Up @@ -29,8 +25,7 @@ else()
set(XETLA_KERNEL_FLAGS ${XETLA_KERNEL_FLAGS} "${XETLA_OFFLINE_OPTIONS}")
endif()

add_library(xetla_kernel SHARED python_main.cpp)
set_target_properties(xetla_kernel PROPERTIES PREFIX "")
Python3_add_library(xetla_kernel MODULE WITH_SOABI python_main.cpp)
target_compile_options(xetla_kernel PRIVATE "-fPIC")
if(USE_IPEX)
target_compile_options(xetla_kernel PRIVATE "-fsycl")
Expand All @@ -40,7 +35,6 @@ endif()
target_compile_options(xetla_kernel PUBLIC "-DXETPP_NEW_XMAIN")
target_link_options(xetla_kernel PRIVATE ${XETLA_KERNEL_FLAGS})
target_link_libraries(xetla_kernel PUBLIC ${TORCH_LIBRARIES} ${TORCH_PYTHON_LIBRARY})
target_include_directories(xetla_kernel PUBLIC "${PYTHON_INCLUDE_DIRS}")
target_include_directories(xetla_kernel PUBLIC "${XeTLALibrary_INCLUDE_DIR}")

if(USE_IPEX)
Expand All @@ -52,3 +46,5 @@ add_subdirectory(softmax)
add_subdirectory(gemm)
add_subdirectory(stream_k_gemm)
add_subdirectory(flash_attention)

install(TARGETS xetla_kernel LIBRARY DESTINATION .)
2 changes: 2 additions & 0 deletions benchmarks/xetla_kernel/flash_attention/fmha_forward_v5.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef TRITONBENCHMARK_FMHA_FWD_V5_H
#define TRITONBENCHMARK_FMHA_FWD_V5_H

#include <cmath>

#include "fmha_policy_v2.h"
#include "fmha_utils.h"
#include "xetla.hpp"
Expand Down

0 comments on commit f4fdd8f

Please sign in to comment.