diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml index 79cb35748e..6c00fbb04d 100644 --- a/.github/workflows/benchmarks-reusable.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -156,6 +156,26 @@ jobs: - name: Install UR run: cmake --install ${{github.workspace}}/ur_build + - name: Checkout UMF + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: oneapi-src/unified-memory-framework + ref: main + path: umf-repo + fetch-depth: 1 + fetch-tags: false + + - name: Configure UMF + run: > + cmake -DCMAKE_BUILD_TYPE=Release + -S${{github.workspace}}/umf-repo + -B${{github.workspace}}/umf_build + -DUMF_BUILD_BENCHMARKS=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + run: cmake --build ${{github.workspace}}/umf_build -j $(nproc) + - name: Run benchmarks working-directory: ${{ github.workspace }}/ur-repo/ id: benchmarks @@ -164,6 +184,7 @@ jobs: ~/bench_workdir --sycl ${{ github.workspace }}/sycl_build --ur ${{ github.workspace }}/ur_install + --umf ${{ github.workspace }}/umf_build --adapter ${{ matrix.adapter.str_name }} ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index 31f2054d9a..abe15ca93c 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -26,7 +26,7 @@ def get_adapter_full_path(): assert False, \ f"could not find adapter file {adapter_path} (and in similar lib paths)" - def run_bench(self, command, env_vars, ld_library=[]): + def run_bench(self, command, env_vars, ld_library=[], add_sycl=True): env_vars_with_forced_adapter = env_vars.copy() if options.ur is not None: env_vars_with_forced_adapter.update( @@ -35,7 +35,7 @@ def run_bench(self, command, env_vars, ld_library=[]): return run( command=command, env_vars=env_vars_with_forced_adapter, - add_sycl=True, + add_sycl=add_sycl, cwd=options.benchmark_cwd, ld_library=ld_library ).stdout.decode() @@ -71,6 +71,9 @@ def run(self, env_vars) -> list[Result]: def teardown(self): raise NotImplementedError() + def stddev_threshold(self): + return None + class Suite: def benchmarks(self) -> list[Benchmark]: raise NotImplementedError() diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/benches/options.py index fa5d52ca8c..7ef7956c8e 100644 --- a/scripts/benchmarks/benches/options.py +++ b/scripts/benchmarks/benches/options.py @@ -12,6 +12,7 @@ class Options: sycl: str = None ur: str = None ur_adapter: str = None + umf: str = None rebuild: bool = True benchmark_cwd: str = "INVALID" timeout: float = 600 diff --git a/scripts/benchmarks/benches/umf.py b/scripts/benchmarks/benches/umf.py new file mode 100644 index 0000000000..251cf15a93 --- /dev/null +++ b/scripts/benchmarks/benches/umf.py @@ -0,0 +1,158 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import random +from utils.utils import git_clone +from .base import Benchmark, Suite +from .result import Result +from utils.utils import run, create_build_path +from .options import options +from .oneapi import get_oneapi +import os +import csv +import io + +def isUMFAvailable(): + return options.umf is not None + +class UMFSuite(Suite): + def __init__(self, directory): + self.directory = directory + if not isUMFAvailable(): + print("UMF not provided. Related benchmarks will not run") + + def setup(self): + if not isUMFAvailable(): + return [] + self.built = True + + def benchmarks(self) -> list[Benchmark]: + if not isUMFAvailable(): + return + + benches = [ + GBench(self), + ] + + return benches + +class ComputeUMFBenchmark(Benchmark): + def __init__(self, bench, name): + self.bench = bench + self.bench_name = name + self.oneapi = get_oneapi() + + self.col_name = None + self.col_iterations = None + self.col_real_time = None + self.col_cpu_time = None + self.col_time_unit = None + + self.col_statistics_time = None + + super().__init__(bench.directory) + + def bin_args(self) -> list[str]: + return [] + + def extra_env_vars(self) -> dict: + return {} + + def setup(self): + if not isUMFAvailable(): + print("UMF prefix path not provided") + return + + self.benchmark_bin = os.path.join(options.umf, 'benchmark', self.bench_name) + + def run(self, env_vars) -> list[Result]: + command = [ + f"{self.benchmark_bin}", + ] + + command += self.bin_args() + env_vars.update(self.extra_env_vars()) + + result = self.run_bench(command, env_vars, add_sycl=False, ld_library=[self.oneapi.tbb_lib()]) + parsed = self.parse_output(result) + results = [] + for r in parsed: + (config, pool, mean) = r + label = f"{config} {pool}" + results.append(Result(label=label, value=mean, command=command, env=env_vars, stdout=result, unit="ns", explicit_group=config)) + return results + + # Implementation with self.col_* indices could lead to the division by None + def get_mean(self, datarow): + raise NotImplementedError() + + def teardown(self): + return + +class GBench(ComputeUMFBenchmark): + def __init__(self, bench): + super().__init__(bench, "umf-benchmark") + + self.col_name = 0 + self.col_iterations = 1 + self.col_real_time = 2 + self.col_cpu_time = 3 + self.col_time_unit = 4 + + self.idx_pool = 0 + self.idx_config = 1 + self.name_separator = '/' + + self.col_statistics_time = self.col_real_time + + def name(self): + return self.bench_name + + # --benchmark_format describes stdout output + # --benchmark_out= and --benchmark_out_format= + # describe output to a file + def bin_args(self): + return ["--benchmark_format=csv"] + + # the default unit + # might be changed globally with --benchmark_time_unit={ns|us|ms|s} + # the change affects only benchmark where time unit has not been set + # explicitly + def unit(self): + return "ns" + + # these benchmarks are not stable, so set this at a large value + def stddev_threshold(self) -> float: + return 0.2 # 20% + + def get_pool_and_config(self, full_name): + list_split = full_name.split(self.name_separator, 1) + if len(list_split) != 2: + raise ValueError("Incorrect benchmark name format: ", full_name) + + return list_split[self.idx_pool], list_split[self.idx_config] + + def get_mean(self, datarow): + return float(datarow[self.col_statistics_time]) + + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.reader(csv_file) + + data_row = next(reader, None) + if data_row is None: + raise ValueError("Benchmark output does not contain data.") + + results = [] + for row in reader: + try: + full_name = row[self.col_name] + pool, config = self.get_pool_and_config(full_name) + mean = self.get_mean(row) + results.append((config, pool, mean)) + except KeyError as e: + raise ValueError(f"Error parsing output: {e}") + + return results diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index ab4adafee6..1b28ec702e 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -9,6 +9,7 @@ from benches.velocity import VelocityBench from benches.syclbench import * from benches.llamacpp import * +from benches.umf import * from benches.test import TestSuite from benches.options import Compare, options from output_markdown import generate_markdown @@ -74,7 +75,7 @@ def remove_outliers(results: dict[str, list[Result]], threshold: float = 3.5) -> return new_results -def process_results(results: dict[str, list[Result]]) -> tuple[bool, list[Result]]: +def process_results(results: dict[str, list[Result]], stddev_threshold_override) -> tuple[bool, list[Result]]: processed: list[Result] = [] # technically, we can detect whether result is below or above threshold per # individual result. However, we can't repeat benchmark runs with that @@ -94,7 +95,7 @@ def process_results(results: dict[str, list[Result]]) -> tuple[bool, list[Result mean_value = statistics.mean(values) stddev = statistics.stdev(values) - threshold = options.stddev_threshold * mean_value + threshold = (stddev_threshold_override if stddev_threshold_override is not None else options.stddev_threshold) * mean_value if stddev > threshold: print(f"stddev {stddev} above the threshold {threshold} for {label}") @@ -120,6 +121,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): VelocityBench(directory), SyclBench(directory), LlamaCppBench(directory), + UMFSuite(directory), #TestSuite() ] if not options.dry_run else [] @@ -159,7 +161,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): processed: list[Result] = [] for _ in range(5): run_iterations(benchmark, merged_env_vars, options.iterations, intermediate_results) - valid, processed = process_results(intermediate_results) + valid, processed = process_results(intermediate_results, benchmark.stddev_threshold()) if valid: break results += processed @@ -231,6 +233,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument('benchmark_directory', type=str, help='Working directory to setup benchmarks.') parser.add_argument('--sycl', type=str, help='Root directory of the SYCL compiler.', default=None) parser.add_argument('--ur', type=str, help='UR install prefix path', default=None) + parser.add_argument('--umf', type=str, help='UMF install prefix path', default=None) parser.add_argument('--adapter', type=str, help='Options to build the Unified Runtime as part of the benchmark', default="level_zero") parser.add_argument("--no-rebuild", help='Rebuild the benchmarks from scratch.', action="store_true") parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[]) @@ -267,6 +270,7 @@ def validate_and_parse_env_args(env_args): options.output_html = args.output_html options.output_markdown = args.output_markdown options.dry_run = args.dry_run + options.umf = args.umf benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/scripts/benchmarks/output_html.py b/scripts/benchmarks/output_html.py index 8bcda148b1..7a8c4af3fb 100644 --- a/scripts/benchmarks/output_html.py +++ b/scripts/benchmarks/output_html.py @@ -157,6 +157,9 @@ def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChar ax.bar_label(rects, fmt='') for rect, run, res in zip(rects, run_results.keys(), run_results.values()): + if res is None: + continue + height = rect.get_height() if height > max_height: max_height = height