diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml index 83c05f896c..f7c660f3bd 100644 --- a/.github/workflows/benchmarks-reusable.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -34,6 +34,10 @@ on: required: false type: boolean default: false + compute_runtime_commit: + required: false + type: string + default: '' permissions: contents: read @@ -200,6 +204,7 @@ jobs: --ur ${{ github.workspace }}/ur_install --umf ${{ github.workspace }}/umf_build --adapter ${{ matrix.adapter.str_name }} + --compute-runtime ${{ inputs.compute_runtime_commit }} ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index af62d40e85..7de3926daf 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -43,6 +43,11 @@ on: type: string required: false default: '' + compute_runtime_commit: + description: 'Compute Runtime commit' + type: string + required: false + default: '' upload_report: description: 'Upload HTML report' type: boolean @@ -65,4 +70,5 @@ jobs: sycl_config_params: ${{ inputs.sycl_config_params }} sycl_repo: ${{ inputs.sycl_repo }} sycl_commit: ${{ inputs.sycl_commit }} + compute_runtime_commit: ${{ inputs.compute_runtime_commit }} upload_report: ${{ inputs.upload_report }} diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index 38bbedd25a..3d5a02fa92 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -7,7 +7,7 @@ import shutil from pathlib import Path from .result import Result -from .options import options +from options import options from utils.utils import download, run import urllib.request import tarfile @@ -28,17 +28,22 @@ def get_adapter_full_path(): f"could not find adapter file {adapter_path} (and in similar lib paths)" def run_bench(self, command, env_vars, ld_library=[], add_sycl=True): - env_vars_with_forced_adapter = env_vars.copy() + env_vars = env_vars.copy() if options.ur is not None: - env_vars_with_forced_adapter.update( + env_vars.update( {'UR_ADAPTERS_FORCE_LOAD': Benchmark.get_adapter_full_path()}) + env_vars.update(options.extra_env_vars) + + ld_libraries = options.extra_ld_libraries.copy() + ld_libraries.extend(ld_library) + return run( command=command, - env_vars=env_vars_with_forced_adapter, + env_vars=env_vars, add_sycl=add_sycl, cwd=options.benchmark_cwd, - ld_library=ld_library + ld_library=ld_libraries ).stdout.decode() def create_data_path(self, name, skip_data_dir = False): diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index be48acce36..a379ad3ab1 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -9,7 +9,7 @@ from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite from .result import Result -from .options import options +from options import options class ComputeBench(Suite): def __init__(self, directory): @@ -22,7 +22,7 @@ def setup(self): if options.sycl is None: return - repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "df38bc342641d7e83fbb4fe764a23d21d734e07b") + repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "d13e5b4d8dd3d28926a74ab7f67f78c10f708a01") build_path = create_build_path(self.directory, 'compute-benchmarks-build') configure_command = [ diff --git a/scripts/benchmarks/benches/llamacpp.py b/scripts/benchmarks/benches/llamacpp.py index 8d01e2832d..5efe9c83c3 100644 --- a/scripts/benchmarks/benches/llamacpp.py +++ b/scripts/benchmarks/benches/llamacpp.py @@ -10,7 +10,7 @@ from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path -from .options import options +from options import options from .oneapi import get_oneapi import os diff --git a/scripts/benchmarks/benches/oneapi.py b/scripts/benchmarks/benches/oneapi.py index 414c4aa64a..9018f109f1 100644 --- a/scripts/benchmarks/benches/oneapi.py +++ b/scripts/benchmarks/benches/oneapi.py @@ -5,7 +5,7 @@ from pathlib import Path from utils.utils import download, run -from .options import options +from options import options import os class OneAPI: diff --git a/scripts/benchmarks/benches/syclbench.py b/scripts/benchmarks/benches/syclbench.py index 1e358eb071..172ba18f1b 100644 --- a/scripts/benchmarks/benches/syclbench.py +++ b/scripts/benchmarks/benches/syclbench.py @@ -9,7 +9,7 @@ from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite from .result import Result -from .options import options +from options import options class SyclBench(Suite): def __init__(self, directory): diff --git a/scripts/benchmarks/benches/test.py b/scripts/benchmarks/benches/test.py index efe789f678..7c93b62d04 100644 --- a/scripts/benchmarks/benches/test.py +++ b/scripts/benchmarks/benches/test.py @@ -8,7 +8,7 @@ from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path -from .options import options +from options import options import os class TestSuite(Suite): diff --git a/scripts/benchmarks/benches/umf.py b/scripts/benchmarks/benches/umf.py index 14137ca437..c3b963fab7 100644 --- a/scripts/benchmarks/benches/umf.py +++ b/scripts/benchmarks/benches/umf.py @@ -8,7 +8,7 @@ from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path -from .options import options +from options import options from .oneapi import get_oneapi import os import csv diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index d22243ebeb..5392b44936 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -9,7 +9,7 @@ from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path -from .options import options +from options import options from .oneapi import get_oneapi import shutil @@ -54,7 +54,6 @@ def __init__(self, name: str, bin_name: str, vb: VelocityBench, unit: str): self.bench_name = name self.bin_name = bin_name self.unit = unit - self.code_path = os.path.join(self.vb.repo_path, self.bench_name, 'SYCL') def download_deps(self): return @@ -66,6 +65,7 @@ def ld_libraries(self) -> list[str]: return [] def setup(self): + self.code_path = os.path.join(self.vb.repo_path, self.bench_name, 'SYCL') self.download_deps() self.benchmark_bin = os.path.join(self.directory, self.bench_name, self.bin_name) @@ -130,12 +130,13 @@ def parse_output(self, stdout: str) -> float: class Bitcracker(VelocityBase): def __init__(self, vb: VelocityBench): super().__init__("bitcracker", "bitcracker", vb, "s") - self.data_path = os.path.join(vb.repo_path, "bitcracker", "hash_pass") def name(self): return "Velocity-Bench Bitcracker" def bin_args(self) -> list[str]: + self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass") + return ["-f", f"{self.data_path}/img_win8_user_hash.txt", "-d", f"{self.data_path}/user_passwords_60000.txt", "-b", "60000"] @@ -175,7 +176,6 @@ def parse_output(self, stdout: str) -> float: class QuickSilver(VelocityBase): def __init__(self, vb: VelocityBench): super().__init__("QuickSilver", "qs", vb, "MMS/CTT") - self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") def run(self, env_vars) -> list[Result]: # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 @@ -191,6 +191,8 @@ def lower_is_better(self): return False def bin_args(self) -> list[str]: + self.data_path = os.path.join(self.vb.repo_path, "QuickSilver", "Examples", "AllScattering") + return ["-i", f"{self.data_path}/scatteringOnly.inp"] def extra_env_vars(self) -> dict: @@ -266,11 +268,10 @@ def parse_output(self, stdout: str) -> float: class DLCifar(VelocityBase): def __init__(self, vb: VelocityBench): - self.oneapi = get_oneapi() super().__init__("dl-cifar", "dl-cifar_sycl", vb, "s") def ld_libraries(self): - return self.oneapi.ld_libraries() + return get_oneapi().ld_libraries() def download_deps(self): # TODO: dl-cifar hardcodes the path to this dataset as "../../datasets/cifar-10-binary"... @@ -278,8 +279,9 @@ def download_deps(self): return def extra_cmake_args(self): + oneapi = get_oneapi() return [ - f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{self.oneapi.dnn_include()} -I{self.oneapi.mkl_include()} -L{self.oneapi.dnn_lib()} -L{self.oneapi.mkl_lib()}" + f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{oneapi.dnn_include()} -I{oneapi.mkl_include()} -L{oneapi.dnn_lib()} -L{oneapi.mkl_lib()}" ] def name(self): @@ -294,11 +296,10 @@ def parse_output(self, stdout: str) -> float: class DLMnist(VelocityBase): def __init__(self, vb: VelocityBench): - self.oneapi = get_oneapi() super().__init__("dl-mnist", "dl-mnist-sycl", vb, "s") def ld_libraries(self): - return self.oneapi.ld_libraries() + return get_oneapi().ld_libraries() def download_deps(self): # TODO: dl-mnist hardcodes the path to this dataset as "../../datasets/"... @@ -308,8 +309,9 @@ def download_deps(self): self.download("datasets", "https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz", "t10k-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True) def extra_cmake_args(self): + oneapi = get_oneapi() return [ - f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{self.oneapi.dnn_include()} -I{self.oneapi.mkl_include()} -L{self.oneapi.dnn_lib()} -L{self.oneapi.mkl_lib()}" + f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{oneapi.dnn_include()} -I{oneapi.mkl_include()} -L{oneapi.dnn_lib()} -L{oneapi.mkl_lib()}" ] def name(self): @@ -337,15 +339,15 @@ def parse_output(self, stdout: str) -> float: class SVM(VelocityBase): def __init__(self, vb: VelocityBench): - self.oneapi = get_oneapi() super().__init__("svm", "svm_sycl", vb, "s") def ld_libraries(self): - return self.oneapi.ld_libraries() + return get_oneapi().ld_libraries() def extra_cmake_args(self): + oneapi = get_oneapi() return [ - f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{self.oneapi.dnn_include()} -I{self.oneapi.mkl_include()} -L{self.oneapi.dnn_lib()} -L{self.oneapi.mkl_lib()}" + f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{oneapi.dnn_include()} -I{oneapi.mkl_include()} -L{oneapi.dnn_lib()} -L{oneapi.mkl_lib()}" ] def name(self): diff --git a/scripts/benchmarks/history.py b/scripts/benchmarks/history.py index 5b83ef9479..82490995b1 100644 --- a/scripts/benchmarks/history.py +++ b/scripts/benchmarks/history.py @@ -7,7 +7,7 @@ import json from pathlib import Path from benches.result import Result, BenchmarkRun -from benches.options import Compare, options +from options import Compare, options from datetime import datetime, timezone from utils.utils import run; diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index 7c1f9ce3a1..6a94367f34 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -11,11 +11,12 @@ from benches.llamacpp import * from benches.umf import * from benches.test import TestSuite -from benches.options import Compare, options +from options import Compare, options from output_markdown import generate_markdown from output_html import generate_html from history import BenchmarkHistory -from utils.utils import prepare_workdir; +from utils.utils import prepare_workdir +from utils.compute_runtime import * import argparse import re @@ -117,6 +118,13 @@ def process_results(results: dict[str, list[Result]], stddev_threshold_override) def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) + if options.build_compute_runtime: + print(f"Setting up Compute Runtime {options.compute_runtime_tag}") + cr = get_compute_runtime() + print("Compute Runtime setup complete.") + options.extra_ld_libraries.extend(cr.ld_libraries()) + options.extra_env_vars.update(cr.env_vars()) + suites = [ ComputeBench(directory), VelocityBench(directory), @@ -129,15 +137,15 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): benchmarks = [] for s in suites: - print(f"Setting up {type(s).__name__}") - s.setup() - print(f"{type(s).__name__} setup complete.") - - for s in suites: - benchmarks += s.benchmarks() + suite_benchmarks = s.benchmarks() + if filter: + suite_benchmarks = [benchmark for benchmark in suite_benchmarks if filter.search(benchmark.name())] - if filter: - benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())] + if suite_benchmarks: + print(f"Setting up {type(s).__name__}") + s.setup() + print(f"{type(s).__name__} setup complete.") + benchmarks += suite_benchmarks for b in benchmarks: print(b.name()) @@ -241,7 +249,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.') parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"]) parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=options.iterations) - parser.add_argument("--stddev-threshold", type=float, help='If stddev % is above this threshold, rerun all iterations', default=options.stddev_threshold) + parser.add_argument("--stddev-threshold", type=float, help='If stddev pct is above this threshold, rerun all iterations', default=options.stddev_threshold) parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=options.timeout) parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=options.epsilon) @@ -252,12 +260,8 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False) parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True) parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False) - parser.add_argument( - "--iterations-stddev", - type=int, - help="Max number of iterations of the loop calculating stddev after completed benchmark runs", - default=options.iterations_stddev, - ) + parser.add_argument("--compute-runtime", nargs='?', const=options.compute_runtime_tag, help="Fetch and build compute runtime") + parser.add_argument("--iterations-stddev", type=int, help="Max number of iterations of the loop calculating stddev after completed benchmark runs") args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -279,6 +283,9 @@ def validate_and_parse_env_args(env_args): options.dry_run = args.dry_run options.umf = args.umf options.iterations_stddev = args.iterations_stddev + if args.compute_runtime is not None: + options.build_compute_runtime = True + options.compute_runtime_tag = args.compute_runtime benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/options.py similarity index 73% rename from scripts/benchmarks/benches/options.py rename to scripts/benchmarks/options.py index 5ef4a022aa..39f34a73e6 100644 --- a/scripts/benchmarks/benches/options.py +++ b/scripts/benchmarks/options.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum class Compare(Enum): @@ -27,6 +27,10 @@ class Options: stddev_threshold: float = 0.02 epsilon: float = 0.02 iterations_stddev: int = 5 + build_compute_runtime: bool = False + extra_ld_libraries: list[str] = field(default_factory=list) + extra_env_vars: dict = field(default_factory=dict) + compute_runtime_tag: str = 'c1ed0334d65f6ce86d7273fe4137d1d4a5b5fa7c' options = Options() diff --git a/scripts/benchmarks/output_markdown.py b/scripts/benchmarks/output_markdown.py index 177869f8f0..13df68d45e 100644 --- a/scripts/benchmarks/output_markdown.py +++ b/scripts/benchmarks/output_markdown.py @@ -5,7 +5,7 @@ import collections, re from benches.result import Result -from benches.options import options +from options import options import math class OutputLine: diff --git a/scripts/benchmarks/utils/compute_runtime.py b/scripts/benchmarks/utils/compute_runtime.py new file mode 100644 index 0000000000..929bc85164 --- /dev/null +++ b/scripts/benchmarks/utils/compute_runtime.py @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import re + +from pathlib import Path +from .utils import * +from options import options + +def replace_in_file(file_path, search_pattern, replacement): + with open(file_path, 'r') as file: + content = file.read() + + modified_content = re.sub(search_pattern, replacement, content) + + with open(file_path, 'w') as file: + file.write(modified_content) + +class ComputeRuntime: + def __init__(self): + self.gmmlib = self.build_gmmlib() + self.level_zero = self.build_level_zero() + self.compute_runtime = self.build_compute_runtime(self.gmmlib, self.level_zero) + + return + + def ld_libraries(self) -> list[str]: + return [ + os.path.join(self.gmmlib, "lib64"), + os.path.join(self.level_zero, "lib64"), + os.path.join(self.compute_runtime, "bin"), + ] + + def env_vars(self) -> dict: + return {"ZE_ENABLE_ALT_DRIVERS" : os.path.join(self.compute_runtime, "bin", "libze_intel_gpu.so")} + + def build_gmmlib(self): + self.gmmlib_repo = git_clone(options.workdir, "gmmlib-repo", "https://github.com/intel/gmmlib.git", "9104c2090158b35d440afdf8ec940d89cc7b3c6a") + self.gmmlib_build = os.path.join(options.workdir, "gmmlib-build") + self.gmmlib_install = os.path.join(options.workdir, "gmmlib-install") + configure_command = [ + "cmake", + f"-B {self.gmmlib_build}", + f"-S {self.gmmlib_repo}", + f"-DCMAKE_INSTALL_PREFIX={self.gmmlib_install}", + f"-DCMAKE_BUILD_TYPE=Release", + ] + run(configure_command) + run(f"cmake --build {self.gmmlib_build} -j") + run(f"cmake --install {self.gmmlib_build}") + return self.gmmlib_install + + def build_level_zero(self): + self.level_zero_repo = git_clone(options.workdir, "level-zero-repo", "https://github.com/oneapi-src/level-zero.git", "3969f34c16a843b943b948f8fe7081ef87deb369") + self.level_zero_build = os.path.join(options.workdir, "level-zero-build") + self.level_zero_install = os.path.join(options.workdir, "level-zero-install") + + cmakelists_path = os.path.join(self.level_zero_repo, "CMakeLists.txt") + # there's a bug in level-zero CMakeLists.txt that makes it install headers into incorrect location. + replace_in_file(cmakelists_path, r'DESTINATION \./include/', 'DESTINATION include/') + + configure_command = [ + "cmake", + f"-B {self.level_zero_build}", + f"-S {self.level_zero_repo}", + f"-DCMAKE_INSTALL_PREFIX={self.level_zero_install}", + f"-DCMAKE_BUILD_TYPE=Release", + ] + run(configure_command) + run(f"cmake --build {self.level_zero_build} -j") + run(f"cmake --install {self.level_zero_build}") + return self.level_zero_install + + def build_compute_runtime(self, gmmlib, level_zero): + self.compute_runtime_repo = git_clone(options.workdir, "compute-runtime-repo", "https://github.com/intel/compute-runtime.git", options.compute_runtime_tag) + self.compute_runtime_build = os.path.join(options.workdir, "compute-runtime-build") + + cmakelists_path = os.path.join(self.compute_runtime_repo, "level_zero", "cmake", "FindLevelZero.cmake") + # specifying custom L0 is problematic... + replace_in_file(cmakelists_path, r'(\$\{LEVEL_ZERO_ROOT\}\s*)', r'\1NO_DEFAULT_PATH\n') + + cmakelists_path = os.path.join(self.compute_runtime_repo, "CMakeLists.txt") + # Remove -Werror... + replace_in_file(cmakelists_path, r'\s-Werror(?:=[a-zA-Z]*)?', '') + + configure_command = [ + "cmake", + f"-B {self.compute_runtime_build}", + f"-S {self.compute_runtime_repo}", + "-DCMAKE_BUILD_TYPE=Release", + "-DNEO_ENABLE_i915_PRELIM_DETECTION=1", + "-DNEO_ENABLE_I915_PRELIM_DETECTION=1", + "-DNEO_SKIP_UNIT_TESTS=1", + f"-DGMM_DIR={gmmlib}", + f"-DLEVEL_ZERO_ROOT={level_zero}" + ] + run(configure_command) + run(f"cmake --build {self.compute_runtime_build} -j") + return self.compute_runtime_build + +def get_compute_runtime() -> ComputeRuntime: # ComputeRuntime singleton + if not hasattr(get_compute_runtime, "instance"): + get_compute_runtime.instance = ComputeRuntime() + return get_compute_runtime.instance diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index 0bb954fab2..08d26bd708 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -10,7 +10,7 @@ import tarfile import urllib # nosec B404 -from benches.options import options +from options import options from pathlib import Path def run(command, env_vars={}, cwd=None, add_sycl=False, ld_library=[]):