From 14ae1887b401639a8e6d441f9a7a7770d6afe9d0 Mon Sep 17 00:00:00 2001 From: Agata Momot Date: Sun, 1 Dec 2024 20:07:30 +0100 Subject: [PATCH] Testing, testing, testing --- .github/workflows/bandit.yml | 30 -- .github/workflows/benchmarks-nightly.yml | 38 -- .github/workflows/benchmarks-reusable.yml | 27 +- .github/workflows/benchmarks.yml | 1 + .github/workflows/build-fuzz-reusable.yml | 75 --- .github/workflows/build-hw-reusable.yml | 125 ----- .github/workflows/cmake.yml | 335 -------------- .github/workflows/codeql.yml | 81 ---- .github/workflows/coverity.yml | 81 ---- .github/workflows/docs.yml | 81 ---- .github/workflows/e2e_core.yml | 214 --------- .github/workflows/e2e_cuda.yml | 24 - .github/workflows/e2e_level_zero.yml | 31 -- .github/workflows/e2e_opencl.yml | 24 - .github/workflows/examples-hw-level-zero.yml | 69 --- .github/workflows/labeler.yml | 23 - .github/workflows/multi_device.yml | 66 --- .github/workflows/nightly.yml | 17 - .github/workflows/prerelease.yml | 24 - .github/workflows/scorecard.yml | 60 --- .github/workflows/source-checks.yml | 71 --- .github/workflows/trivy.yml | 50 -- scripts/benchmarks/benches/base.py | 23 +- scripts/benchmarks/benches/compute.py | 77 ++- scripts/benchmarks/benches/llamacpp.py | 78 +--- scripts/benchmarks/benches/oneapi.py | 86 ++++ scripts/benchmarks/benches/options.py | 7 +- scripts/benchmarks/benches/result.py | 6 +- scripts/benchmarks/benches/test.py | 19 +- scripts/benchmarks/benches/umf.py | 172 +++++++ scripts/benchmarks/benches/velocity.py | 122 ++++- scripts/benchmarks/main.py | 154 ++++-- scripts/benchmarks/output_html.py | 463 ++++++++++--------- scripts/benchmarks/utils/utils.py | 9 +- 34 files changed, 857 insertions(+), 1906 deletions(-) delete mode 100644 .github/workflows/bandit.yml delete mode 100644 .github/workflows/benchmarks-nightly.yml delete mode 100644 .github/workflows/build-fuzz-reusable.yml delete mode 100644 .github/workflows/build-hw-reusable.yml delete mode 100644 .github/workflows/cmake.yml delete mode 100644 .github/workflows/codeql.yml delete mode 100644 .github/workflows/coverity.yml delete mode 100644 .github/workflows/docs.yml delete mode 100644 .github/workflows/e2e_core.yml delete mode 100644 .github/workflows/e2e_cuda.yml delete mode 100644 .github/workflows/e2e_level_zero.yml delete mode 100644 .github/workflows/e2e_opencl.yml delete mode 100644 .github/workflows/examples-hw-level-zero.yml delete mode 100644 .github/workflows/labeler.yml delete mode 100644 .github/workflows/multi_device.yml delete mode 100644 .github/workflows/nightly.yml delete mode 100644 .github/workflows/prerelease.yml delete mode 100644 .github/workflows/scorecard.yml delete mode 100644 .github/workflows/source-checks.yml delete mode 100644 .github/workflows/trivy.yml create mode 100644 scripts/benchmarks/benches/oneapi.py create mode 100644 scripts/benchmarks/benches/umf.py diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml deleted file mode 100644 index 124046d258..0000000000 --- a/.github/workflows/bandit.yml +++ /dev/null @@ -1,30 +0,0 @@ -# Runs bandit security checker for code written in Python. -name: Bandit - -on: [push, pull_request, workflow_dispatch] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - bandit: - name: Bandit - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{matrix.os}} - - steps: - - name: Clone the git repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - # Scan all files, except for dev. scripts - - name: Run Bandit - run: bandit -r . -x ./scripts/ diff --git a/.github/workflows/benchmarks-nightly.yml b/.github/workflows/benchmarks-nightly.yml deleted file mode 100644 index 3da0d09c7a..0000000000 --- a/.github/workflows/benchmarks-nightly.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Compute Benchmarks Nightly - -on: - schedule: - - cron: '0 0 * * *' # Runs at midnight UTC every day - -permissions: - contents: read - pull-requests: write - -jobs: - nightly: - name: Compute Benchmarks Nightly level-zero - uses: ./.github/workflows/benchmarks-reusable.yml - with: - str_name: 'level_zero' - unit: 'gpu' - pr_no: 0 - bench_script_params: '--save baseline' - sycl_config_params: '' - sycl_repo: 'intel/llvm' - sycl_commit: '' - - nightly2: - # we need to wait until previous job is done so that the html report - # contains both runs - needs: nightly - name: Compute Benchmarks Nightly level-zero v2 - uses: ./.github/workflows/benchmarks-reusable.yml - with: - str_name: 'level_zero_v2' - unit: 'gpu' - pr_no: 0 - bench_script_params: '--save baseline-v2' - sycl_config_params: '' - sycl_repo: 'intel/llvm' - sycl_commit: '' - upload_report: true diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml index 79cb35748e..ccf1a12b74 100644 --- a/.github/workflows/benchmarks-reusable.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -90,6 +90,8 @@ jobs: pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + + # REMOVE start - name: Fetch PR's merge commit if: ${{ inputs.pr_no != 0 }} working-directory: ${{github.workspace}}/ur-repo @@ -100,6 +102,7 @@ jobs: git checkout origin/pr/${PR_NO}/merge git rev-parse origin/pr/${PR_NO}/merge +# HERE - name: Checkout SYCL uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: @@ -155,6 +158,27 @@ jobs: - name: Install UR run: cmake --install ${{github.workspace}}/ur_build + # # remove end + + - name: Checkout UMF + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: EuphoricThinking/unified-memory-framework + ref: 6aef9bf11b47d0b1390209aeafb6998f0da475c9 + path: umf-repo + fetch-depth: 1 + fetch-tags: false + + - name: Configure UMF + run: > + cmake -DCMAKE_BUILD_TYPE=Release + -S${{github.workspace}}/umf-repo + -B${{github.workspace}}/umf_build + -DUMF_BUILD_BENCHMARKS=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + run: cmake --build ${{github.workspace}}/umf_build -j $(nproc) - name: Run benchmarks working-directory: ${{ github.workspace }}/ur-repo/ @@ -164,6 +188,7 @@ jobs: ~/bench_workdir --sycl ${{ github.workspace }}/sycl_build --ur ${{ github.workspace }}/ur_install + --umf ${{ github.workspace }}/umf_build --adapter ${{ matrix.adapter.str_name }} ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} @@ -199,5 +224,5 @@ jobs: if: ${{ always() && inputs.upload_report }} uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: - path: ur-repo/benchmark_results.html + path: umf-repo/benchmark_results.html key: benchmark-results-${{ matrix.adapter.str_name }}-${{ github.run_id }} diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index af62d40e85..07ceb1b0ff 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -1,4 +1,5 @@ name: Compute Benchmarks +# for tests on: workflow_dispatch: diff --git a/.github/workflows/build-fuzz-reusable.yml b/.github/workflows/build-fuzz-reusable.yml deleted file mode 100644 index 2cbd1b87ff..0000000000 --- a/.github/workflows/build-fuzz-reusable.yml +++ /dev/null @@ -1,75 +0,0 @@ ---- -name: Build - Fuzztests on L0 HW - Reusable - -on: - workflow_call: - inputs: - test_label: - required: true - type: string - -permissions: - contents: read - -jobs: - fuzztest-build-hw: - name: Build and run fuzz tests on L0 HW - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW - strategy: - matrix: - build_type: [Debug, Release] - compiler: [{c: clang, cxx: clang++}] - - runs-on: 'FUZZTESTS' - # In order to use sanitizers, vm.mmap_rnd_bits=28 must be set in the system, - # otherwise random SEGV at the start of the test occurs. - # Alternatively, clang 18.1.0 onwards with fixed sanitizers behavior can be used, - # if available. - # TODO: Remove this advice once clang 18.1.0 is available in the system (like ie. as an apt package). - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Download DPC++ - run: | - wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz - mkdir dpcpp_compiler - tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler - - - name: Build level zero with gcc - run: | - git clone -b v1.18.5 --depth=1 https://github.com/oneapi-src/level-zero.git ${{github.workspace}}/level-zero - cd ${{github.workspace}}/level-zero - cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ - cmake --build build -j $(nproc) - - - name: Configure CMake - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DUR_ENABLE_TRACING=ON - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_BUILD_TESTS=ON - -DUR_USE_ASAN=ON - -DUR_USE_UBSAN=ON - -DUR_BUILD_ADAPTER_L0=ON - -DUR_LEVEL_ZERO_LOADER_LIBRARY=${{github.workspace}}/level-zero/build/lib/libze_loader.so - -DUR_LEVEL_ZERO_INCLUDE_DIR=${{github.workspace}}/level-zero/include/ - -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ - -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Fuzz test - working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "${{inputs.test_label}}" --verbose - - - name: Get information about platform - if: ${{ always() }} - run: .github/scripts/get_system_info.sh diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml deleted file mode 100644 index 3e332c73fc..0000000000 --- a/.github/workflows/build-hw-reusable.yml +++ /dev/null @@ -1,125 +0,0 @@ ---- -name: Build - Adapters on HW - Reusable - -on: - workflow_call: - inputs: - adapter_name: - required: true - type: string - other_adapter_name: - required: false - type: string - default: "" - runner_name: - required: true - type: string - platform: - description: "Platform string, `UR_CTS_ADAPTER_PLATFORM` will be set to this." - required: false - type: string - default: "" - static_loader: - required: false - type: string - default: OFF - static_adapter: - required: false - type: string - default: OFF - -permissions: - contents: read - -env: - UR_LOG_CUDA: "level:error;flush:error" - UR_LOG_HIP: "level:error;flush:error" - UR_LOG_LEVEL_ZERO: "level:error;flush:error" - UR_LOG_NATIVE_CPU: "level:error;flush:error" - UR_LOG_OPENCL: "level:error;flush:error" - -jobs: - adapter-build-hw: - name: Build & Test HW - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW - strategy: - matrix: - adapter: [{ - name: "${{inputs.adapter_name}}", - other_name: "${{inputs.other_adapter_name}}", - platform: "${{inputs.platform}}", - static_Loader: "${{inputs.static_loader}}", - static_adapter: "${{inputs.static_loader}}" - }] - build_type: [Debug, Release] - compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] - # TODO: The latest L0 loader segfaults when built with clang. - exclude: - - adapter: {name: L0, platform: ""} - compiler: {c: clang, cxx: clang++} - # Exclude these configurations to avoid overloading the runners. - - adapter: {static_Loader: ON} - build_type: Release - - adapter: {static_Loader: ON} - compiler: {c: clang, cxx: clang++} - - adapter: {static_adapter: ON} - build_type: Release - - adapter: {static_adapter: ON} - compiler: {c: clang, cxx: clang++} - - runs-on: ${{inputs.runner_name}} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Download DPC++ - run: | - wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz - mkdir dpcpp_compiler - tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler - - - name: Configure CMake - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_ENABLE_TRACING=ON - -DUR_DEVELOPER_MODE=ON - -DUR_BUILD_TESTS=ON - -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON - -DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }} - ${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }} - -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}} - -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}} - -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ - -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib - -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install - ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }} - ${{ matrix.adapter.name == 'HIP' && '-DUR_HIP_PLATFORM=AMD' || '' }} - - - name: Build - # This is so that device binaries can find the sycl runtime library - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Install - # This is to check that install command does not fail - run: cmake --install ${{github.workspace}}/build - - - name: Test adapter specific - working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180 - # Don't run adapter specific tests when building multiple adapters - if: ${{ matrix.adapter.other_name == '' }} - - - name: Test adapters - working-directory: ${{github.workspace}}/build - run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180 - - - name: Get information about platform - if: ${{ always() }} - run: .github/scripts/get_system_info.sh diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml deleted file mode 100644 index 0a4ae99a58..0000000000 --- a/.github/workflows/cmake.yml +++ /dev/null @@ -1,335 +0,0 @@ -name: Build and test - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - pull-requests: write - -jobs: - ubuntu-build: - name: Build - Ubuntu - strategy: - matrix: - os: ['ubuntu-20.04', 'ubuntu-22.04'] - build_type: [Debug, Release] - compiler: [{c: gcc, cxx: g++}] - libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=OFF'] - pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON', '-DUMF_ENABLE_POOL_TRACKING=OFF'] - latency_tracking: ['-DUR_ENABLE_LATENCY_HISTOGRAM=OFF'] - include: - - os: 'ubuntu-22.04' - build_type: Release - compiler: {c: clang, cxx: clang++} - libbacktrace: '-DVAL_USE_LIBBACKTRACE_BACKTRACE=OFF' - - os: 'ubuntu-22.04' - build_type: Release - compiler: {c: gcc, cxx: g++} - libbacktrace: '-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON' - - os: 'ubuntu-22.04' - build_type: Release - compiler: {c: clang, cxx: clang++} - libbacktrace: '-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON' - - os: 'ubuntu-20.04' - build_type: Release - compiler: {c: gcc-7, cxx: g++-7} - - os: 'ubuntu-22.04' - build_type: Release - compiler: {c: clang, cxx: clang++} - latency_tracking: '-DUR_ENABLE_LATENCY_HISTOGRAM=ON' - runs-on: ${{ (matrix.os == 'ubuntu-22.04' && github.repository_owner == 'oneapi-src') && 'intel-ubuntu-22.04' || matrix.os }} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install apt packages - run: | - sudo apt-get update - sudo apt-get install -y ${{matrix.compiler.c}} devscripts - - - name: Install libhwloc - run: .github/scripts/install_hwloc.sh - - - name: Setup PATH - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - - - name: Install g++-7 - if: matrix.compiler.cxx == 'g++-7' - run: | - sudo apt-get install -y ${{matrix.compiler.cxx}} - - - name: Install libbacktrace - if: matrix.libbacktrace == '-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON' - run: | - git clone https://github.com/ianlancetaylor/libbacktrace.git - cd libbacktrace - ./configure - make - sudo make install - cd .. - - - name: Download DPC++ - if: matrix.os == 'ubuntu-22.04' - run: | - sudo apt install libncurses5 - wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-09-27/sycl_linux.tar.gz - mkdir -p ${{github.workspace}}/dpcpp_compiler - tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C ${{github.workspace}}/dpcpp_compiler - - - name: Configure CMake - if: matrix.os == 'ubuntu-22.04' - # WEXTRA: https://github.com/oneapi-src/unified-runtime/issues/2109 - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DUR_ENABLE_TRACING=ON - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_BUILD_TESTS=ON - -DUR_FORMAT_CPP_STYLE=OFF - -DUR_DEVELOPER_MODE=ON - -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ - -DUR_CONFORMANCE_TEST_LOADER=OFF - ${{matrix.libbacktrace}} - ${{matrix.pool_tracking}} - ${{matrix.latency_tracking}} - - - name: Configure CMake - if: matrix.os == 'ubuntu-20.04' - # WEXTRA: https://github.com/oneapi-src/unified-runtime/issues/2109 - # Note: Disable Werror, since 20.04 raises different ones than 22.04 - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DUR_ENABLE_TRACING=ON - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_BUILD_TESTS=ON - -DUR_FORMAT_CPP_STYLE=OFF - -DUR_DEVELOPER_MODE=OFF - ${{matrix.libbacktrace}} - ${{matrix.pool_tracking}} - ${{matrix.latency_tracking}} - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Verify hardening flags have been set - run: cmake --build ${{github.workspace}}/build --target verify-hardening - # https://github.com/oneapi-src/unified-runtime/issues/2120 - if: ${{ matrix.compiler.cxx != 'clang++' && matrix.os != 'ubuntu-20.04' }} - - - name: Test - working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" - - fuzztest: - name: Fuzz tests short - uses: ./.github/workflows/build-fuzz-reusable.yml - with: - test_label: "fuzz-short" - - level-zero: - name: Level Zero - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: L0 - runner_name: L0 - - level-zero-v2: - name: Level Zero V2 - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: L0_V2 - runner_name: L0 - - level-zero-static: - name: Level Zero static - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: L0 - runner_name: L0 - static_loader: ON - static_adapter: ON - - opencl: - name: OpenCL - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: OPENCL - runner_name: OPENCL - platform: "Intel(R) OpenCL" - - cuda: - name: CUDA - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: CUDA - runner_name: CUDA - - hip: - name: HIP - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: HIP - runner_name: HIP - - native-cpu: - name: Native CPU - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: NATIVE_CPU - runner_name: NATIVE_CPU - - # Native CPU jobs are here to force the loader to be used (UR will not use the loader if there is only one target) - combined-opencl-native-cpu: - name: OpenCL + Native CPU (Loader) - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: OPENCL - other_adapter_name: NATIVE_CPU - runner_name: OPENCL - platform: "OPENCL:Intel(R) OpenCL" - - combined-level-zero-native-cpu: - name: Level Zero + Native CPU (Loader) - uses: ./.github/workflows/build-hw-reusable.yml - with: - adapter_name: L0 - other_adapter_name: NATIVE_CPU - runner_name: L0 - - e2e-level-zero: - name: E2E L0 - permissions: - contents: read - pull-requests: write - needs: [ubuntu-build, level-zero] - uses: ./.github/workflows/e2e_level_zero.yml - - e2e-opencl: - name: E2E OpenCL - permissions: - contents: read - pull-requests: write - needs: [ubuntu-build, opencl] - uses: ./.github/workflows/e2e_opencl.yml - - # Causes hangs: https://github.com/oneapi-src/unified-runtime/issues/2398 - #e2e-cuda: - # name: E2E CUDA - # permissions: - # contents: read - # pull-requests: write - # needs: [ubuntu-build, cuda] - # uses: ./.github/workflows/e2e_cuda.yml - - windows-build: - name: Build - Windows - strategy: - matrix: - os: ['windows-2019', 'windows-2022'] - adapter: [ - {name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}, - {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}, - {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - ] - - # TODO: building level zero loader on windows-2019 and clang-cl is currently broken - exclude: - - os: 'windows-2019' - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} - - os: 'windows-2019' - adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} - - os: 'windows-2019' - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} - compiler: {c: clang-cl, cxx: clang-cl} - - adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} - compiler: {c: clang-cl, cxx: clang-cl} - - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - compiler: {c: clang-cl, cxx: clang-cl} - - build_type: [Debug, Release] - # TODO: clang-cl seems to be fully broken (https://github.com/oneapi-src/unified-runtime/issues/2348) - #compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] - compiler: [{c: cl, cxx: cl}] - include: - #- compiler: {c: clang-cl, cxx: clang-cl} - # toolset: "-T ClangCL" - - os: 'windows-2022' - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - build_type: 'Release' - compiler: {c: cl, cxx: cl} - - runs-on: ${{matrix.os}} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install hwloc - run: vcpkg install hwloc:x64-windows - - - name: Configure CMake - env: - VCPKG_PATH: "C:/vcpkg/packages/hwloc_x64-windows" - run: > - cmake - -B${{github.workspace}}/build - ${{matrix.toolset}} - -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DCMAKE_POLICY_DEFAULT_CMP0094=NEW - -DUR_ENABLE_TRACING=ON - -DUR_DEVELOPER_MODE=ON - -DUR_BUILD_TESTS=ON - -DUR_FORMAT_CPP_STYLE=OFF - -DUR_CONFORMANCE_TEST_LOADER=OFF - ${{matrix.adapter.var}} - - - name: Build all - run: cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} -j $Env:NUMBER_OF_PROCESSORS - - - name: Test - working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" - - macos-build: - name: Build - MacOS - strategy: - matrix: - os: ['macos-13'] - runs-on: ${{matrix.os}} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 - with: - python-version: 3.9 - - - name: Install prerequisites - run: python3 -m pip install -r third_party/requirements.txt - - - name: Install hwloc - run: brew install hwloc - - - name: Configure CMake - run: > - cmake - -B${{github.workspace}}/build - -DUR_ENABLE_TRACING=ON - -DUR_DEVELOPER_MODE=ON - -DCMAKE_BUILD_TYPE=Release - -DUR_BUILD_TESTS=ON - -DUR_FORMAT_CPP_STYLE=ON - -DUMF_ENABLE_POOL_TRACKING=ON - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(sysctl -n hw.logicalcpu) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml deleted file mode 100644 index fdc5d0c0c0..0000000000 --- a/.github/workflows/codeql.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: "CodeQL" - -on: [push] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - analyze-ubuntu: - name: Analyze on Ubuntu - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - permissions: - security-events: write - - strategy: - fail-fast: false - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Initialize CodeQL - uses: github/codeql-action/init@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7 - with: - languages: cpp, python - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Install apt packages - run: | - sudo apt-get update - sudo apt-get install -y libhwloc-dev - - - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON -DUR_ENABLE_TRACING=ON -DUR_BUILD_TOOLS=ON -DUMF_ENABLE_POOL_TRACKING=ON - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7 - - analyze-windows: - name: Analyze on Windows - runs-on: windows-latest - permissions: - security-events: write - - strategy: - fail-fast: false - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Initialize CodeQL - uses: github/codeql-action/init@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7 - with: - languages: cpp, python - - - name: Install pip packages - run: python3 -m pip install -r third_party/requirements.txt - - - name: Install hwloc - run: vcpkg install hwloc:x64-windows - - - name: Configure CMake - env: - VCPKG_PATH: "C:/vcpkg/packages/hwloc_x64-windows" - run: cmake -B ${{github.workspace}}/build -DCMAKE_POLICY_DEFAULT_CMP0094=NEW -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON -DUR_ENABLE_TRACING=ON -DUR_BUILD_TOOLS=ON -DUMF_ENABLE_POOL_TRACKING=ON -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) --config Release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7 diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml deleted file mode 100644 index d7d8bf937b..0000000000 --- a/.github/workflows/coverity.yml +++ /dev/null @@ -1,81 +0,0 @@ -# Coverity - static analysis build. It requires Coverity's token (set in CI's secret). -name: coverity-unified-runtime - -on: - workflow_dispatch: - schedule: - # Run every day at 22:00 UTC - - cron: '0 22 * * *' - -permissions: - contents: read - -jobs: - coverity: - name: Coverity - # run only on upstream; forks don't have token for upstream's cov project - if: github.repository == 'oneapi-src/unified-runtime' - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install dependencies - run: | - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb - sudo dpkg -i cuda-keyring_1.1-1_all.deb - sudo apt-get update - sudo apt-get install -y libhwloc-dev libtbb-dev cuda-toolkit-12-6 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Download Coverity - run: | - wget -O coverity_tool.tgz -nv https://scan.coverity.com/download/linux64 \ - --post-data "token=${{ secrets.COVERITY_SCAN_TOKEN }}&project=oneapi-src%2Funified-runtime" - - - name: Extract Coverity - run: tar xzf coverity_tool.tgz - - # TODO: enable HIP adapter as well (requires proper package(s) installation) - - name: Configure CMake - run: > - cmake - -B ${{github.workspace}}/build - -DCMAKE_BUILD_TYPE=Release - -DUR_DEVELOPER_MODE=OFF - -DUR_FORMAT_CPP_STYLE=ON - -DUR_ENABLE_TRACING=ON - -DUR_BUILD_TESTS=ON - -DUR_BUILD_ADAPTER_L0=ON - -DUR_BUILD_ADAPTER_CUDA=ON - -DCUDA_CUDA_LIBRARY=/usr/local/cuda-12.6/targets/x86_64-linux/lib/stubs/libcuda.so - -DUR_BUILD_ADAPTER_NATIVE_CPU=ON - -DUR_BUILD_ADAPTER_HIP=OFF - -DUR_BUILD_ADAPTER_OPENCL=ON - - - name: Build - run: | - export COVERITY_DIR=$(find . -maxdepth 1 -type d -name "cov-analysis-linux64-*" | head -n 1) - if [ -n "$COVERITY_DIR" ]; then - export PATH="$PATH:$COVERITY_DIR/bin" - fi - cov-build --dir ${{github.workspace}}/cov-int cmake --build ${{github.workspace}}/build --config Release -j$(nproc) - - - name: Create tarball to analyze - run: tar czvf cov-int_ur.tgz cov-int - - - name: Push tarball to scan - run: | - BRANCH_NAME=$(echo ${GITHUB_REF_NAME}) - COMMIT_ID=$(echo $GITHUB_SHA) - curl --form token=${{ secrets.COVERITY_SCAN_TOKEN }} \ - --form email=bb-ur@intel.com \ - --form file=@cov-int_ur.tgz \ - --form version="$COMMIT_ID" \ - --form description="$BRANCH_NAME:$COMMIT_ID" \ - https://scan.coverity.com/builds\?project\=oneapi-src%2Funified-runtime diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index b4c40334d4..0000000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,81 +0,0 @@ -# Simple workflow for deploying static content to GitHub Pages -name: Deploy documentation to Pages - -on: - # Runs on pushes targeting the default branch - push: - branches: ["main"] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -# Allow one concurrent deployment -concurrency: - group: "pages" - cancel-in-progress: true - -jobs: - # Build job - build: - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - steps: - - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 - with: - python-version: 3.9 - - - name: Install apt package - run: | - sudo apt-get update - sudo apt-get install -y doxygen - - - name: Install prerequisites - run: python3 -m pip install -r third_party/requirements.txt - - - name: Setup Pages - uses: actions/configure-pages@1f0c5cde4bc74cd7e1254d0cb4de8d49e9068c7d # v4.0.0 - - - name: Build Documentation - working-directory: ${{github.workspace}}/scripts - run: | - python3 run.py --core - mkdir -p ${{ github.workspace }}/ur-repo/ - mkdir -p ${{github.workspace}}/docs/html - - - name: Download benchmark HTML - id: download-bench-html - uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - with: - path: ur-repo/benchmark_results.html - key: benchmark-results- - - - name: Move benchmark HTML - # exact or partial cache hit - if: steps.download-bench-html.outputs.cache-hit != '' - run: | - mv ${{ github.workspace }}/ur-repo/benchmark_results.html ${{ github.workspace }}/docs/html/ - - - name: Upload artifact - uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 - with: - path: ${{github.workspace}}/docs/html - - # Deployment job - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - needs: build - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@87c3283f01cd6fe19a0ab93a23b2f6fcba5a8e42 # v4.0.3 diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml deleted file mode 100644 index f12913c648..0000000000 --- a/.github/workflows/e2e_core.yml +++ /dev/null @@ -1,214 +0,0 @@ -name: E2E build & run - -on: - # this workflow can by only triggered by other workflows - # for example by: e2e_cuda.yml or e2e_opencl.yml - workflow_call: - # acceptable input from adapter-specific workflows - inputs: - name: - description: Adapter name - type: string - required: true - str_name: - description: Formatted adapter name - type: string - required: true - prefix: - description: Prefix for cmake parameter - type: string - required: true - config: - description: Params for sycl configuration - type: string - required: true - unit: - description: Test unit (cpu/gpu) - type: string - required: true - runner_tag: - description: Tag defifned for the runner - type: string - required: true - xfail: - description: Allow test failures - type: string - required: false - xfail_not: - description: Not xfail - type: string - required: false - filter_out: - description: Tests to filter out completely - type: string - required: false - extra_lit_flags: - description: Additional llvm-lit flags to use - type: string - required: false - -permissions: - contents: read - pull-requests: write - -jobs: - changed-files: - name: Check for changed files - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - outputs: - any_changed: ${{ steps.get-changed.outputs.any_changed }} - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - name: Get changed files - id: get-changed - uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2 - with: - files: | - source/adapters/${{inputs.str_name}}/** - source/loader/** - .github/workflows/e2e* - - e2e-build-hw: - # We want to run the job only if there are changes in the specific adapter - if: needs.changed-files.outputs.any_changed == 'true' - name: Build SYCL, UR, run E2E - needs: changed-files - permissions: - contents: read - pull-requests: write - - # Allow failures, since SYCL tests and API may be not stable - continue-on-error: true - strategy: - matrix: - adapter: [ - {name: "${{inputs.name}}", - str_name: "${{inputs.str_name}}", - prefix: "${{inputs.prefix}}", - config: "${{inputs.config}}", - unit: "${{inputs.unit}}", - extra_lit_flags: "${{inputs.extra_lit_flags}}"}, - ] - build_type: [Release] - compiler: [{c: clang, cxx: clang++}] - - runs-on: ${{inputs.runner_tag}} - - steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - - name: Cleanup self-hosted workspace - if: always() - run: | - ls -la ./ - rm -rf ./* || true - - - name: Checkout UR - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - path: ur-repo - - - name: Checkout SYCL - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - repository: intel/llvm - ref: refs/heads/sycl - path: sycl-repo - - - name: Set CUDA env vars - if: matrix.adapter.name == 'CUDA' - run: | - echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV - - - name: Configure SYCL - run: > - python3 sycl-repo/buildbot/configure.py - -t ${{matrix.build_type}} - -o ${{github.workspace}}/sycl_build - --cmake-gen "Ninja" - --ci-defaults ${{matrix.adapter.config}} - --cmake-opt="-DLLVM_INSTALL_UTILS=ON" - --cmake-opt="-DSYCL_PI_TESTS=OFF" - --cmake-opt="-DSYCL_UR_USE_FETCH_CONTENT=OFF" - --cmake-opt="-DSYCL_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/" - --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache - --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache - - - name: Build SYCL - run: cmake --build ${{github.workspace}}/sycl_build -j - - - name: Set extra llvm-lit options - if: matrix.adapter.extra_lit_flags != '' - run: echo "LIT_OPTS=${{matrix.adapter.extra_lit_flags}}" >> $GITHUB_ENV - - - name: Run check-sycl - # Remove after fixing SYCL test :: abi/layout_handler.cpp - # This issue does not affect further execution of e2e with UR. - continue-on-error: true - run: cmake --build ${{github.workspace}}/sycl_build --target check-sycl - - - name: Set additional env. vars - run: | - echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV - - # Running (newly built) sycl-ls sets up some extra variables - - name: Setup SYCL variables - run: | - which clang++ sycl-ls - SYCL_UR_TRACE=-1 sycl-ls - - - name: Build e2e tests - run: > - cmake - -GNinja - -B ${{github.workspace}}/build-e2e/ - -S ${{github.workspace}}/sycl-repo/sycl/test-e2e/ - -DSYCL_TEST_E2E_TARGETS="${{matrix.adapter.prefix}}${{matrix.adapter.str_name}}:${{matrix.adapter.unit}}" - -DCMAKE_CXX_COMPILER="$(which clang++)" - -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py" - - - name: Set LIT_XFAIL - if: inputs.xfail != '' - run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV - - - name: Set LIT_FILTER_OUT - if: inputs.filter_out != '' - run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV - - - name: Set LIT_XFAIL_NOT - if: inputs.xfail_not != '' - run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV - - # TODO: remove once intel/llvm lit tests can properly recognize the GPU - - name: Configure hardware platform feature for L0 - if: matrix.adapter.name == 'L0' - run: | - sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc-1T")' build-e2e/lit.site.cfg.py - sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc")' build-e2e/lit.site.cfg.py - - - name: Run e2e tests - id: tests - run: ninja -C build-e2e check-sycl-e2e || echo "e2e tests have failed. Ignoring failure." - - # FIXME: Requires pull-request: write permissions but this is only granted - # on pull requests from forks if using pull_request_target workflow - # trigger but not the pull_request trigger.. - # - name: Add comment to PR - # uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - # if: ${{ always() }} - # with: - # script: | - # const adapter = '${{ matrix.adapter.name }}'; - # const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - # const test_status = '${{ steps.tests.outcome }}'; - # const job_status = '${{ job.status }}'; - # const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`; - - # github.rest.issues.createComment({ - # issue_number: context.issue.number, - # owner: context.repo.owner, - # repo: context.repo.repo, - # body: body - # }) diff --git a/.github/workflows/e2e_cuda.yml b/.github/workflows/e2e_cuda.yml deleted file mode 100644 index c2f1d969b8..0000000000 --- a/.github/workflows/e2e_cuda.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: E2E Cuda - -on: - workflow_call: - -permissions: - contents: read - pull-requests: write - -jobs: - e2e-build-hw: - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW - name: Start e2e job - # use core flow, run it with cuda specific parameters - uses: ./.github/workflows/e2e_core.yml - with: - name: "CUDA" - runner_tag: "CUDA_E2E" - str_name: "cuda" - prefix: "ext_oneapi_" - config: "--cuda" - unit: "gpu" - extra_lit_flags: "-sv --max-time=3600" - xfail: "Regression/device_num.cpp" diff --git a/.github/workflows/e2e_level_zero.yml b/.github/workflows/e2e_level_zero.yml deleted file mode 100644 index 1fd814f271..0000000000 --- a/.github/workflows/e2e_level_zero.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: E2E Level Zero - -on: - workflow_call: - -permissions: - contents: read - pull-requests: write - -jobs: - e2e-build-hw: - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW - name: Start e2e job - # use core flow, run it with L0 specific parameters - uses: ./.github/workflows/e2e_core.yml - with: - name: "L0" - runner_tag: "L0_E2E" - str_name: "level_zero" - prefix: "ext_oneapi_" - config: "" - unit: "gpu" - # Failing tests - xfail: "InvokeSimd/Regression/call_vadd_1d_spill.cpp;InvokeSimd/Regression/ImplicitSubgroup/call_vadd_1d_spill.cpp;ESIMD/mask_expand_load.cpp;Matrix/joint_matrix_prefetch.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;" - # Unexpectedly Passed Tests - xfail_not: "" - # Flaky tests - filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp" - # These runners by default spawn upwards of 260 workers. - # We also add a time out just in case some test hangs - extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600" diff --git a/.github/workflows/e2e_opencl.yml b/.github/workflows/e2e_opencl.yml deleted file mode 100644 index e4714b2434..0000000000 --- a/.github/workflows/e2e_opencl.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: E2E OpenCL - -on: - workflow_call: - -permissions: - contents: read - pull-requests: write - -jobs: - e2e-build-hw: - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW - name: Start e2e job - # use core flow, run it with OpenCL specific parameters - uses: ./.github/workflows/e2e_core.yml - with: - name: "OPENCL" - runner_tag: "OPENCL" - str_name: "opencl" - prefix: "" - config: "" - unit: "cpu" - xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp" - extra_lit_flags: "-sv --max-time=3600" diff --git a/.github/workflows/examples-hw-level-zero.yml b/.github/workflows/examples-hw-level-zero.yml deleted file mode 100644 index cf28b8e258..0000000000 --- a/.github/workflows/examples-hw-level-zero.yml +++ /dev/null @@ -1,69 +0,0 @@ ---- -name: Examples - Adapters on Level Zero HW - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - examples: - name: Examples on HW - # if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW - if: false # temporaily disabled due to conda env setup issues - strategy: - matrix: - adapter: [ - {name: L0} - ] - build_type: [Debug, Release] - compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] - - runs-on: ${{matrix.adapter.name}} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Init conda env - uses: conda-incubator/setup-miniconda@9f54435e0e72c53962ee863144e47a4b094bfd35 # v2.3.0 - with: - miniconda-version: "latest" - activate-environment: examples - environment-file: third_party/deps.yml - auto-activate-base: false - - - name: Configure CMake - shell: bash -el {0} - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON - -DUR_BUILD_EXAMPLE_CODEGEN=ON - -DUR_DEVELOPER_MODE=ON - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Test codegen example - working-directory: ${{github.workspace}}/build - run: bin/codegen - - # conda init adds content to user's profile making it failing (if conda is gone) - - name: Cleanup after conda init - run: | - cat ${HOME}/.profile || true - rm ${HOME}/.profile || true - - - name: Get information about platform - if: ${{ always() }} - run: .github/scripts/get_system_info.sh diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index faf7060503..0000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Automatically add labels to pull requests based on globs in the -# .github/labeler.yml config file. For documentation see: -# https://github.com/marketplace/actions/labeler ---- -name: Pull Request Labeler - -on: [ pull_request_target ] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - labeler: - permissions: - contents: read - pull-requests: write - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - steps: - - uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0 diff --git a/.github/workflows/multi_device.yml b/.github/workflows/multi_device.yml deleted file mode 100644 index 48a804bdf8..0000000000 --- a/.github/workflows/multi_device.yml +++ /dev/null @@ -1,66 +0,0 @@ ---- -name: Multi Device testing - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - examples: - name: Multi Device testing - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW - strategy: - matrix: - adapter: [ - {name: L0}, - {name: L0_V2} - ] - build_type: [Debug, Release] - compiler: [{c: gcc, cxx: g++}] # TODO: investigate why memory-adapter-level_zero hangs with clang - - runs-on: "${{matrix.adapter.name}}_2T" - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Download DPC++ - run: | - wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz - mkdir dpcpp_compiler - tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler - - - name: Configure CMake - shell: bash -el {0} - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_DEVELOPER_MODE=ON - -DUR_BUILD_TESTS=ON - -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON - -DUR_CONFORMANCE_TEST_LOADER=OFF - -DUR_TEST_DEVICES_COUNT=2 - -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ - -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Test adapter specific - working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" -E "test-adapter-level_zero_multi_queue" --timeout 180 - # TODO: investigate why test-adapter-level_zero_multi_queue fails on newer driver - - - name: Test adapters - working-directory: ${{github.workspace}}/build - run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "exp_command_buffer" --timeout 180 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml deleted file mode 100644 index 06d4026676..0000000000 --- a/.github/workflows/nightly.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: Nightly - -on: - workflow_dispatch: - schedule: - # Run every day at 23:00 UTC - - cron: '0 23 * * *' - -permissions: - contents: read - -jobs: - fuzztest: - name: Fuzz tests long - uses: ./.github/workflows/build-fuzz-reusable.yml - with: - test_label: "fuzz-long" diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml deleted file mode 100644 index f466cc693e..0000000000 --- a/.github/workflows/prerelease.yml +++ /dev/null @@ -1,24 +0,0 @@ ---- -name: Deploy weekly prerelease - -on: - schedule: - # At 23:00 on Friday, GitHub actions schedule is in UTC time. - - cron: 0 23 * * 5 - -permissions: - contents: read - -jobs: - weekly-prerelease: - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - permissions: - contents: write - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Create weekly prerelease - run: - gh release create --prerelease --title "Weekly Stable Snapshot $(date +%Y/%m/%d)" weekly-$(date +%Y-%m-%d) - env: - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml deleted file mode 100644 index 693cfdd9e5..0000000000 --- a/.github/workflows/scorecard.yml +++ /dev/null @@ -1,60 +0,0 @@ -# Scorecard analysis, looking for vulnerabilities and bad practices in the repo. -name: Scorecard supply-chain security -on: - # For Branch-Protection check. Only the default branch is supported. See - # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection - branch_protection_rule: - workflow_dispatch: - schedule: - # Runs at 22:45 UTC on Thursday. - - cron: '45 22 * * 4' - push: - branches: [ "main" ] - -# Declare default permissions as read only. -permissions: read-all - -jobs: - analysis: - name: Scorecard analysis - runs-on: ubuntu-latest - permissions: - # Needed to upload the results to code-scanning dashboard. - security-events: write - # Needed to publish results and get a badge (see publish_results below). - id-token: write - - steps: - - name: "Checkout code" - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - persist-credentials: false - - - name: "Run analysis" - uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 - with: - results_file: scorecard_results.sarif - results_format: sarif - # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: - # - you want to enable the Branch-Protection check on a *public* repository, or - # - you are installing Scorecard on a *private* repository - # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. - # repo_token: ${{ secrets.SCORECARD_TOKEN }} - - # Publish results to OpenSSF REST API for easy access by consumers - # Allows the repository to include the Scorecard badge. - # See https://github.com/ossf/scorecard-action#publishing-results. - publish_results: true - - - name: "Upload artifact" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # 4.3.1 - with: - name: Scorecard results - path: scorecard_results.sarif - retention-days: 5 - - # Upload the results to GitHub's code scanning dashboard. - - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8 - with: - sarif_file: scorecard_results.sarif diff --git a/.github/workflows/source-checks.yml b/.github/workflows/source-checks.yml deleted file mode 100644 index e73f403320..0000000000 --- a/.github/workflows/source-checks.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Source Checks - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - source-checks: - name: Source Checks - strategy: - matrix: - os: ['ubuntu-22.04', 'windows-2022'] - - runs-on: ${{matrix.os}} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 - with: - python-version: 3.9 - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: "[Lin] Install doxygen" - if: matrix.os == 'ubuntu-22.04' - run: | - sudo apt-get update - sudo apt-get install -y doxygen - - - name: "[Win] Install doxygen" - if: matrix.os == 'windows-2022' - run: | - $WorkingDir = $PWD.Path - Invoke-WebRequest -Uri https://github.com/doxygen/doxygen/releases/download/Release_1_9_8/doxygen-1.9.8.windows.x64.bin.zip -OutFile "$WorkingDir\doxygen.zip" - Expand-Archive -Path "$WorkingDir\doxygen.zip" - Add-Content $env:GITHUB_PATH "$WorkingDir\doxygen" - - - name: "[Lin] Install hwloc" - if: matrix.os == 'ubuntu-22.04' - run: .github/scripts/install_hwloc.sh - - - name: "[Win] Install hwloc" - if: matrix.os == 'windows-2022' - run: vcpkg install hwloc:x64-windows - - - name: Configure CMake - env: - VCPKG_PATH: "C:/vcpkg/packages/hwloc_x64-windows" - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - -DUR_ENABLE_TRACING=OFF - -DCMAKE_BUILD_TYPE=Debug - -DUR_BUILD_TESTS=OFF - -DUR_FORMAT_CPP_STYLE=ON - - # Verifying license should be enough on a single OS - - name: Verify that each source file contains a license - if: matrix.os == 'ubuntu-22.04' - run: cmake --build ${{github.workspace}}/build --target verify-licenses - - - name: Generate source from spec, check for uncommitted diff - run: cmake --build ${{github.workspace}}/build --target check-generated diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml deleted file mode 100644 index c2ef1d47e7..0000000000 --- a/.github/workflows/trivy.yml +++ /dev/null @@ -1,50 +0,0 @@ -# Runs linter for Docker files -name: Trivy - -on: - workflow_dispatch: - push: - pull_request: - paths: - - '.github/docker/*Dockerfile' - - '.github/workflows/trivy.yml' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - linux: - name: Trivy - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - permissions: - security-events: write - - steps: - - name: Clone repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Run Trivy - uses: aquasecurity/trivy-action@84384bd6e777ef152729993b8145ea352e9dd3ef # v0.17.0 - with: - scan-type: 'config' - hide-progress: false - format: 'sarif' - output: 'trivy-results.sarif' - exit-code: 1 # Fail if issue found - # file with suppressions: .trivyignore (in root dir) - - - name: Print report and trivyignore file - run: | - echo "### Trivy ignore content:" - cat .trivyignore - echo "### Trivy report:" - cat trivy-results.sarif - - - name: Upload results - uses: github/codeql-action/upload-sarif@e8893c57a1f3a2b659b6b55564fdfdbbd2982911 # v3.24.0 - with: - sarif_file: 'trivy-results.sarif' diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index feeaa568b6..13c2a8ef92 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -35,24 +35,26 @@ def run_bench(self, command, env_vars, ld_library=[]): return run( command=command, env_vars=env_vars_with_forced_adapter, - add_sycl=True, + add_sycl=options.sycl is not None, cwd=options.benchmark_cwd, ld_library=ld_library ).stdout.decode() - def create_data_path(self, name): - data_path = os.path.join(self.directory, "data", name) - - if options.rebuild and Path(data_path).exists(): - shutil.rmtree(data_path) + def create_data_path(self, name, skip_data_dir = False): + if skip_data_dir: + data_path = os.path.join(self.directory, name) + else: + data_path = os.path.join(self.directory, 'data', name) + if options.rebuild and Path(data_path).exists(): + shutil.rmtree(data_path) Path(data_path).mkdir(parents=True, exist_ok=True) return data_path - def download(self, name, url, file, untar = False): - self.data_path = self.create_data_path(name) - return download(self.data_path, url, file, True) + def download(self, name, url, file, untar = False, unzip = False, skip_data_dir = False): + self.data_path = self.create_data_path(name, skip_data_dir) + return download(self.data_path, url, file, untar, unzip) def name(self): raise NotImplementedError() @@ -69,9 +71,6 @@ def run(self, env_vars) -> list[Result]: def teardown(self): raise NotImplementedError() - def ignore_iterations(self): - return False - class Suite: def benchmarks(self) -> list[Benchmark]: raise NotImplementedError() diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index f872399e9e..229a50e84d 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -50,6 +50,8 @@ def benchmarks(self) -> list[Benchmark]: return [] benches = [ + SubmitKernelL0(self, 0), + SubmitKernelL0(self, 1), SubmitKernelSYCL(self, 0), SubmitKernelSYCL(self, 1), QueueInOrderMemcpy(self, 0, 'Device', 'Device', 1024), @@ -59,14 +61,16 @@ def benchmarks(self) -> list[Benchmark]: ExecImmediateCopyQueue(self, 0, 1, 'Device', 'Device', 1024), ExecImmediateCopyQueue(self, 1, 1, 'Device', 'Host', 1024), VectorSum(self), - MemcpyExecute(self, 400, 1, 102400, 10, 1, 1), - MemcpyExecute(self, 100, 8, 102400, 10, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1), - MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1), - MemcpyExecute(self, 400, 1, 102400, 10, 0, 1), - MemcpyExecute(self, 100, 8, 102400, 10, 0, 1), - MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1), - MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1), + MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0), + MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), ] if options.ur is not None: @@ -82,7 +86,7 @@ def parse_unit_type(compute_unit): return "instr" elif "[us]" in compute_unit: return "μs" - return "unknown" + return compute_unit.replace("[", "").replace("]", "") class ComputeBenchmark(Benchmark): def __init__(self, bench, name, test): @@ -100,6 +104,9 @@ def extra_env_vars(self) -> dict: def setup(self): self.benchmark_bin = os.path.join(self.bench.directory, 'compute-benchmarks-build', 'bin', self.bench_name) + def explicit_group(self): + return "" + def run(self, env_vars) -> list[Result]: command = [ f"{self.benchmark_bin}", @@ -114,9 +121,10 @@ def run(self, env_vars) -> list[Result]: result = self.run_bench(command, env_vars) parsed_results = self.parse_output(result) ret = [] - for label, mean, unit in parsed_results: - extra_label = " CPU count" if parse_unit_type(unit) == "CPU count" else "" - ret.append(Result(label=self.name() + extra_label, value=mean, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit))) + for label, median, stddev, unit in parsed_results: + extra_label = " CPU count" if parse_unit_type(unit) == "instr" else "" + explicit_group = self.explicit_group() + extra_label if self.explicit_group() != "" else "" + ret.append(Result(label=self.name() + extra_label, explicit_group=explicit_group, value=median, stddev=stddev, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit))) return ret def parse_output(self, output): @@ -131,8 +139,11 @@ def parse_output(self, output): try: label = data_row[0] mean = float(data_row[1]) + median = float(data_row[2]) + # compute benchmarks report stddev as % + stddev = mean * (float(data_row[3].strip('%')) / 100.0) unit = data_row[7] - results.append((label, mean, unit)) + results.append((label, median, stddev, unit)) except (ValueError, IndexError) as e: raise ValueError(f"Error parsing output: {e}") if len(results) == 0: @@ -151,6 +162,9 @@ def name(self): order = "in order" if self.ioq else "out of order" return f"api_overhead_benchmark_sycl SubmitKernel {order}" + def explicit_group(self): + return "SubmitKernel" + def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", @@ -171,6 +185,32 @@ def name(self): order = "in order" if self.ioq else "out of order" return f"api_overhead_benchmark_ur SubmitKernel {order}" + def explicit_group(self): + return "SubmitKernel" + + def bin_args(self) -> list[str]: + return [ + f"--Ioq={self.ioq}", + "--DiscardEvents=0", + "--MeasureCompletion=0", + "--iterations=100000", + "--Profiling=0", + "--NumKernels=10", + "--KernelExecTime=1" + ] + +class SubmitKernelL0(ComputeBenchmark): + def __init__(self, bench, ioq): + self.ioq = ioq + super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel") + + def name(self): + order = "in order" if self.ioq else "out of order" + return f"api_overhead_benchmark_l0 SubmitKernel {order}" + + def explicit_group(self): + return "SubmitKernel" + def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", @@ -255,6 +295,10 @@ def __init__(self, bench, type, size, placement): def name(self): return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" + # measurement is in GB/s + def lower_is_better(self): + return False + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -282,22 +326,23 @@ def bin_args(self) -> list[str]: ] class MemcpyExecute(ComputeBenchmark): - def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations, srcUSM, dstUSM): + def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations, srcUSM, dstUSM, useEvent): self.numOpsPerThread = numOpsPerThread self.numThreads = numThreads self.allocSize = allocSize self.iterations = iterations self.srcUSM = srcUSM self.dstUSM = dstUSM + self.useEvents = useEvent super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute") def name(self): - return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" + return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" + (" without events" if not self.useEvents else "") def bin_args(self) -> list[str]: return [ "--Ioq=1", - "--UseEvents=1", + f"--UseEvents={self.useEvents}", "--MeasureCompletion=1", "--UseQueuePerThread=1", f"--AllocSize={self.allocSize}", diff --git a/scripts/benchmarks/benches/llamacpp.py b/scripts/benchmarks/benches/llamacpp.py index 50dd8d04c6..2dbdb5cbcf 100644 --- a/scripts/benchmarks/benches/llamacpp.py +++ b/scripts/benchmarks/benches/llamacpp.py @@ -6,85 +6,14 @@ import csv import io from pathlib import Path -import re -import shutil from utils.utils import download, git_clone from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path from .options import options +from .oneapi import get_oneapi import os -class OneAPI: - # random unique number for benchmark oneAPI installation - ONEAPI_BENCHMARK_INSTANCE_ID = 98765 - def __init__(self, directory): - self.oneapi_dir = os.path.join(directory, 'oneapi') - Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) - # delete if some option is set? - - # can we just hardcode these links? - self.install_package('dnnl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh') - self.install_package('mkl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh') - return - - def install_package(self, name, url): - package_path = os.path.join(self.oneapi_dir, name) - if Path(package_path).exists(): - print(f"{package_path} exists, skipping installing oneAPI package {name}...") - return - - package = download(self.oneapi_dir, url, f'package_{name}.sh') - try: - print(f"installing f{name}") - run(f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}") - except: - print("oneAPI installation likely exists already") - return - print(f"f{name} installation complete") - - def package_dir(self, package, dir): - return os.path.join(self.oneapi_dir, package, 'latest', dir) - - def package_cmake(self, package): - package_lib = self.package_dir(package, 'lib') - return os.path.join(package_lib, 'cmake', package) - - def mkl_lib(self): - return self.package_dir('mkl', 'lib') - - def mkl_include(self): - return self.package_dir('mkl', 'include') - - def mkl_cmake(self): - return self.package_cmake('mkl') - - def dnn_lib(self): - return self.package_dir('dnnl', 'lib') - - def dnn_include(self): - return self.package_dir('dnnl', 'include') - - def dnn_cmake(self): - return self.package_cmake('dnnl') - - def tbb_lib(self): - return self.package_dir('tbb', 'lib') - - def tbb_cmake(self): - return self.package_cmake('tbb') - - def compiler_lib(self): - return self.package_dir('compiler', 'lib') - - def ld_libraries(self): - return [ - self.compiler_lib(), - self.mkl_lib(), - self.tbb_lib(), - self.dnn_lib() - ] - class LlamaCppBench(Suite): def __init__(self, directory): if options.sycl is None: @@ -103,7 +32,7 @@ def setup(self): self.model = download(self.models_dir, "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", "Phi-3-mini-4k-instruct-q4.gguf") - self.oneapi = OneAPI(self.directory) + self.oneapi = get_oneapi() self.build_path = create_build_path(self.directory, 'llamacpp-build') @@ -147,9 +76,6 @@ def name(self): def lower_is_better(self): return False - def ignore_iterations(self): - return True - def run(self, env_vars) -> list[Result]: command = [ f"{self.benchmark_bin}", diff --git a/scripts/benchmarks/benches/oneapi.py b/scripts/benchmarks/benches/oneapi.py new file mode 100644 index 0000000000..414c4aa64a --- /dev/null +++ b/scripts/benchmarks/benches/oneapi.py @@ -0,0 +1,86 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from pathlib import Path +from utils.utils import download, run +from .options import options +import os + +class OneAPI: + # random unique number for benchmark oneAPI installation + ONEAPI_BENCHMARK_INSTANCE_ID = 98765 + def __init__(self): + self.oneapi_dir = os.path.join(options.workdir, 'oneapi') + Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) + # delete if some option is set? + + # can we just hardcode these links? + self.install_package('dnnl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh') + self.install_package('mkl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh') + return + + def install_package(self, name, url): + package_path = os.path.join(self.oneapi_dir, name) + if Path(package_path).exists(): + print(f"{package_path} exists, skipping installing oneAPI package {name}...") + return + + package = download(self.oneapi_dir, url, f'package_{name}.sh') + try: + print(f"installing f{name}") + run(f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}") + except: + print("oneAPI installation likely exists already") + return + print(f"f{name} installation complete") + + def package_dir(self, package, dir): + return os.path.join(self.oneapi_dir, package, 'latest', dir) + + def package_cmake(self, package): + package_lib = self.package_dir(package, 'lib') + return os.path.join(package_lib, 'cmake', package) + + def mkl_lib(self): + return self.package_dir('mkl', 'lib') + + def mkl_include(self): + return self.package_dir('mkl', 'include') + + def mkl_cmake(self): + return self.package_cmake('mkl') + + def dnn_lib(self): + return self.package_dir('dnnl', 'lib') + + def dnn_include(self): + return self.package_dir('dnnl', 'include') + + def dnn_cmake(self): + return self.package_cmake('dnnl') + + def tbb_lib(self): + return self.package_dir('tbb', 'lib') + + def tbb_cmake(self): + return self.package_cmake('tbb') + + def compiler_lib(self): + return self.package_dir('compiler', 'lib') + + def ld_libraries(self): + return [ + self.compiler_lib(), + self.mkl_lib(), + self.tbb_lib(), + self.dnn_lib() + ] + +oneapi_instance = None + +def get_oneapi() -> OneAPI: # oneAPI singleton + if not hasattr(get_oneapi, "instance"): + get_oneapi.instance = OneAPI() + return get_oneapi.instance diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/benches/options.py index 5997cdedb8..f793c1fa36 100644 --- a/scripts/benchmarks/benches/options.py +++ b/scripts/benchmarks/benches/options.py @@ -8,19 +8,24 @@ class Compare(Enum): @dataclass class Options: + workdir: str = None sycl: str = None ur: str = None + umf: str = None ur_adapter: str = None rebuild: bool = True benchmark_cwd: str = "INVALID" timeout: float = 600 - iterations: int = 5 + iterations: int = 3 verbose: bool = False compare: Compare = Compare.LATEST compare_max: int = 10 # average/median over how many results output_html: bool = False output_markdown: bool = True dry_run: bool = False + # these two should probably be merged into one setting + stddev_threshold: float = 0.02 + epsilon: float = 0.02 options = Options() diff --git a/scripts/benchmarks/benches/result.py b/scripts/benchmarks/benches/result.py index 7d40040607..c975fa792d 100644 --- a/scripts/benchmarks/benches/result.py +++ b/scripts/benchmarks/benches/result.py @@ -18,7 +18,11 @@ class Result: stdout: str passed: bool = True unit: str = "" - # values should not be set by the benchmark + explicit_group: str = "" + # stddev can be optionally set by the benchmark, + # if not set, it will be calculated automatically. + stddev: float = 0.0 + # values below should not be set by the benchmark name: str = "" lower_is_better: bool = True git_hash: str = '' diff --git a/scripts/benchmarks/benches/test.py b/scripts/benchmarks/benches/test.py index 802688f032..efe789f678 100644 --- a/scripts/benchmarks/benches/test.py +++ b/scripts/benchmarks/benches/test.py @@ -20,30 +20,31 @@ def setup(self): def benchmarks(self) -> list[Benchmark]: bench_configs = [ - ("Memory Bandwidth", 2000, 200), - ("Latency", 100, 20), - ("Throughput", 1500, 150), - ("FLOPS", 3000, 300), - ("Cache Miss Rate", 250, 25), + ("Memory Bandwidth", 2000, 200, "Foo Group"), + ("Latency", 100, 20, "Bar Group"), + ("Throughput", 1500, 150, "Foo Group"), + ("FLOPS", 3000, 300, "Foo Group"), + ("Cache Miss Rate", 250, 25, "Bar Group"), ] result = [] - for base_name, base_value, base_diff in bench_configs: + for base_name, base_value, base_diff, group in bench_configs: for variant in range(6): value_multiplier = 1.0 + (variant * 0.2) name = f"{base_name} {variant+1}" value = base_value * value_multiplier diff = base_diff * value_multiplier - result.append(TestBench(name, value, diff)) + result.append(TestBench(name, value, diff, group)) return result class TestBench(Benchmark): - def __init__(self, name, value, diff): + def __init__(self, name, value, diff, group = ''): self.bname = name self.value = value self.diff = diff + self.group = group super().__init__("") def name(self): @@ -58,7 +59,7 @@ def setup(self): def run(self, env_vars) -> list[Result]: random_value = self.value + random.uniform(-1 * (self.diff), self.diff) return [ - Result(label=self.name(), value=random_value, command="", env={"A": "B"}, stdout="no output", unit="ms") + Result(label=self.name(), explicit_group=self.group, value=random_value, command="", env={"A": "B"}, stdout="no output", unit="ms") ] def teardown(self): diff --git a/scripts/benchmarks/benches/umf.py b/scripts/benchmarks/benches/umf.py new file mode 100644 index 0000000000..7725943271 --- /dev/null +++ b/scripts/benchmarks/benches/umf.py @@ -0,0 +1,172 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import random +from utils.utils import git_clone +from .base import Benchmark, Suite +from .result import Result +from utils.utils import run, create_build_path +from .options import options +import os +import csv +import io + +def isUMFAvailable(): + return options.umf is not None + +class UMFSuite(Suite): + def __init__(self, directory): + self.directory = directory + if not isUMFAvailable(): + print("UMF not provided. Related benchmarks will not run") + + def setup(self): + if not isUMFAvailable(): + return [] + self.built = True + + def benchmarks(self) -> list[Benchmark]: + if not isUMFAvailable(): + return + + benches = [ + GBench(self), + ] + + return benches + +class ComputeUMFBenchmark(Benchmark): + def __init__(self, bench, name): + self.bench = bench + self.bench_name = name + + self.col_name = None + self.col_iterations = None + self.col_real_time = None + self.col_cpu_time = None + self.col_time_unit = None + + self.col_statistics_time = None + + super().__init__(bench.directory) + + def bin_args(self) -> list[str]: + return [] + + def extra_env_vars(self) -> dict: + return {} + + def setup(self): + if not isUMFAvailable(): + print("UMF prefix path not provided") + return + + self.benchmark_bin = os.path.join(options.umf, 'benchmark', self.bench_name) + + def run(self, env_vars) -> list[Result]: + command = [ + f"{self.benchmark_bin}", + ] + + command += self.bin_args() + env_vars.update(self.extra_env_vars()) + + result = self.run_bench(command, env_vars) + parsed = self.parse_output(result) + results = [] + for r in parsed: + (config, pool, mean) = r + label = f"{config} {pool}" + results.append(Result(label=label, value=mean, command=command, env=env_vars, stdout=result, unit="ns", explicit_group=config)) + return results + + # if different time units - convert TODO safety check for time units + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.reader(csv_file) + next(reader, None) + data_row = next(reader, None) + if data_row is None: + raise ValueError("Benchmark output does not contain data.") + try: + label = data_row[0] + mean = float(data_row[1]) + return (label, mean) + except (ValueError, IndexError) as e: + raise ValueError(f"Error parsing output: {e}") + + + + # Implementation with self.col_* indices could lead to the division by None + def get_mean(self, datarow): + raise NotImplementedError() + + def teardown(self): + return + +class GBench(ComputeUMFBenchmark): + def __init__(self, bench): + super().__init__(bench, "umf-benchmark") + + self.col_name = 0 + self.col_iterations = 1 + self.col_real_time = 2 + self.col_cpu_time = 3 + self.col_time_unit = 4 + + self.idx_pool = 0 + self.idx_config = 1 + self.name_separator = '/' + + self.col_statistics_time = self.col_real_time + + def name(self): + return self.bench_name + + # --benchmark_format describes stdout output + # --benchmark_out= and --benchmark_out_format= + # describe output to a file + def bin_args(self): + return ["--benchmark_format=csv"] + + # the default unit + # might be changed globally with --benchmark_time_unit={ns|us|ms|s} + # the change affects only benchmark where time unit has not been set + # explicitly + def unit(self): + return "ns" + + def get_pool_and_config(self, full_name): + list_split = full_name.split(self.name_separator, 1) + if len(list_split) != 2: + raise ValueError("Incorrect benchmark name format: ", full_name) + + return list_split[self.idx_pool], list_split[self.idx_config] + + def get_mean(self, datarow): + return float(datarow[self.col_statistics_time]) + + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.reader(csv_file) + + data_row = next(reader, None) + if data_row is None: + raise ValueError("Benchmark output does not contain data.") + + results = [] + for row in reader: + try: + full_name = row[self.col_name] + pool, config = self.get_pool_and_config(full_name) + mean = self.get_mean(row) + results.append((config, pool, mean)) + except KeyError as e: + raise ValueError(f"Error parsing output: {e}") + + return results + + + \ No newline at end of file diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index 605cf03fd4..705421d963 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -10,6 +10,9 @@ from .result import Result from utils.utils import run, create_build_path from .options import options +from .oneapi import get_oneapi +import shutil + import os class VelocityBench(Suite): @@ -35,7 +38,10 @@ def benchmarks(self) -> list[Benchmark]: CudaSift(self), Easywave(self), QuickSilver(self), - SobelFilter(self) + SobelFilter(self), + DLCifar(self), + DLMnist(self), + SVM(self) ] class VelocityBase(Benchmark): @@ -50,6 +56,12 @@ def __init__(self, name: str, bin_name: str, vb: VelocityBench, unit: str): def download_deps(self): return + def extra_cmake_args(self) -> list[str]: + return [] + + def ld_libraries(self) -> list[str]: + return [] + def setup(self): self.download_deps() self.benchmark_bin = os.path.join(self.directory, self.bench_name, self.bin_name) @@ -62,8 +74,10 @@ def setup(self): f"-S {self.code_path}", f"-DCMAKE_BUILD_TYPE=Release" ] + configure_command += self.extra_cmake_args() + run(configure_command, {'CC': 'clang', 'CXX':'clang++'}, add_sycl=True) - run(f"cmake --build {build_path} -j", add_sycl=True) + run(f"cmake --build {build_path} -j", add_sycl=True, ld_library=self.ld_libraries()) def bin_args(self) -> list[str]: return [] @@ -82,7 +96,7 @@ def run(self, env_vars) -> list[Result]: ] command += self.bin_args() - result = self.run_bench(command, env_vars) + result = self.run_bench(command, env_vars, ld_library=self.ld_libraries()) return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result, unit=self.unit) ] @@ -136,7 +150,6 @@ def __init__(self, vb: VelocityBench): def download_deps(self): self.download("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz", untar=True) - return def name(self): return "Velocity-Bench Sobel Filter" @@ -228,7 +241,6 @@ def get_last_elapsed_time(self, log_file_path) -> float: def parse_output(self, stdout: str) -> float: return self.get_last_elapsed_time(os.path.join(options.benchmark_cwd, "easywave.log")) - class CudaSift(VelocityBase): def __init__(self, vb: VelocityBench): super().__init__("cudaSift", "cudaSift", vb, "ms") @@ -248,3 +260,103 @@ def parse_output(self, stdout: str) -> float: return float(match.group(1)) else: raise ValueError("Failed to parse benchmark output.") + +class DLCifar(VelocityBase): + def __init__(self, vb: VelocityBench): + self.oneapi = get_oneapi() + super().__init__("dl-cifar", "dl-cifar_sycl", vb, "s") + + def ld_libraries(self): + return self.oneapi.ld_libraries() + + def download_deps(self): + # TODO: dl-cifar hardcodes the path to this dataset as "../../datasets/cifar-10-binary"... + self.download("datasets", "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", "cifar-10-binary.tar.gz", untar=True, skip_data_dir=True) + return + + def extra_cmake_args(self): + return [ + f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{self.oneapi.dnn_include()} -I{self.oneapi.mkl_include()} -L{self.oneapi.dnn_lib()} -L{self.oneapi.mkl_lib()}" + ] + + def name(self): + return "Velocity-Bench dl-cifar" + + def parse_output(self, stdout: str) -> float: + match = re.search(r'dl-cifar - total time for whole calculation: (\d+\.\d+) s', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("Failed to parse benchmark output.") + +class DLMnist(VelocityBase): + def __init__(self, vb: VelocityBench): + self.oneapi = get_oneapi() + super().__init__("dl-mnist", "dl-mnist-sycl", vb, "s") + + def ld_libraries(self): + return self.oneapi.ld_libraries() + + def download_deps(self): + # TODO: dl-mnist hardcodes the path to this dataset as "../../datasets/"... + self.download("datasets", "https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz", "train-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True) + self.download("datasets", "https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz", "train-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True) + self.download("datasets", "https://raw.githubusercontent.com/fgnt/mnist/master/t10k-images-idx3-ubyte.gz", "t10k-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True) + self.download("datasets", "https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz", "t10k-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True) + + def extra_cmake_args(self): + return [ + f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{self.oneapi.dnn_include()} -I{self.oneapi.mkl_include()} -L{self.oneapi.dnn_lib()} -L{self.oneapi.mkl_lib()}" + ] + + def name(self): + return "Velocity-Bench dl-mnist" + + def bin_args(self): + return [ + "-conv_algo", "ONEDNN_AUTO" + ] + + # TODO: This shouldn't be required. + # The application crashes with a segfault without it. + def extra_env_vars(self): + return { + "NEOReadDebugKeys":"1", + "DisableScratchPages":"0", + } + + def parse_output(self, stdout: str) -> float: + match = re.search(r'dl-mnist - total time for whole calculation: (\d+\.\d+) s', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("Failed to parse benchmark output.") + +class SVM(VelocityBase): + def __init__(self, vb: VelocityBench): + self.oneapi = get_oneapi() + super().__init__("svm", "svm_sycl", vb, "s") + + def ld_libraries(self): + return self.oneapi.ld_libraries() + + def extra_cmake_args(self): + return [ + f"-DCMAKE_CXX_FLAGS=-O3 -fsycl -ffast-math -I{self.oneapi.dnn_include()} -I{self.oneapi.mkl_include()} -L{self.oneapi.dnn_lib()} -L{self.oneapi.mkl_lib()}" + ] + + def name(self): + return "Velocity-Bench svm" + + def bin_args(self): + return [ + f"{self.code_path}/a9a", + f"{self.code_path}/a.m", + ] + + def parse_output(self, stdout: str) -> float: + match = re.search(r'Total elapsed time : (\d+\.\d+) s', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("Failed to parse benchmark output.") diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index c83825c9e5..e692c80972 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -14,22 +14,114 @@ from output_markdown import generate_markdown from output_html import generate_html from history import BenchmarkHistory +from benches.umf import * from utils.utils import prepare_workdir; import argparse import re +import statistics # Update this if you are changing the layout of the results files INTERNAL_WORKDIR_VERSION = '2.0' +def run_iterations(benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]]): + for iter in range(iters): + print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) + bench_results = benchmark.run(env_vars) + if bench_results is None: + print(f"did not finish (OK for sycl-bench).") + break + + for bench_result in bench_results: + # TODO: report failures in markdown/html ? + if not bench_result.passed: + print(f"complete ({bench_result.label}: verification FAILED)") + continue + + print(f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit}).") + + bench_result.name = bench_result.label + bench_result.lower_is_better = benchmark.lower_is_better() + + if bench_result.label not in results: + results[bench_result.label] = [] + + results[bench_result.label].append(bench_result) + +# https://www.statology.org/modified-z-score/ +def modified_z_score(values: list[float]) -> list[float]: + median = statistics.median(values) + mad = statistics.median([abs(v - median) for v in values]) + if mad == 0: + return [0] * len(values) + return [(0.6745 * (v - median)) / mad for v in values] + +def remove_outliers(results: dict[str, list[Result]], threshold: float = 3.5) -> dict[str, list[Result]]: + new_results = {} + for key, rlist in results.items(): + # don't eliminate outliers on first pass + if len(rlist) <= options.iterations: + new_results[key] = rlist + continue + + values = [r.value for r in rlist] + z_scores = modified_z_score(values) + filtered_rlist = [r for r, z in zip(rlist, z_scores) if abs(z) <= threshold] + + if not filtered_rlist: + new_results[key] = rlist + else: + new_results[key] = filtered_rlist + + return new_results + +def process_results(results: dict[str, list[Result]]) -> tuple[bool, list[Result]]: + processed: list[Result] = [] + # technically, we can detect whether result is below or above threshold per + # individual result. However, we can't repeat benchmark runs with that + # granularity. So we just reject all results and try again. + valid_results = True # above stddev threshold + + for label, rlist in remove_outliers(results).items(): + if (len(rlist) == 0): + continue + + if len(rlist) == 1: + processed.append(rlist[0]) + continue + + values = [r.value for r in rlist] + + mean_value = statistics.mean(values) + stddev = statistics.stdev(values) + + threshold = options.stddev_threshold * mean_value + + if stddev > threshold: + print(f"stddev {stddev} above the threshold {threshold} for {label}") + valid_results = False + + rlist.sort(key=lambda res: res.value) + median_index = len(rlist) // 2 + median_result = rlist[median_index] + + # only override the stddev if not already set + if median_result.stddev == 0.0: + median_result.stddev = stddev + + processed.append(median_result) + + return valid_results, processed + def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) suites = [ - ComputeBench(directory), - VelocityBench(directory), - SyclBench(directory), - LlamaCppBench(directory), + UMFSuite(directory), + # ComputeBench(directory), + # VelocityBench(directory), + # SyclBench(directory), + # LlamaCppBench(directory), #TestSuite() ] if not options.dry_run else [] @@ -65,36 +157,14 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): for benchmark in benchmarks: try: merged_env_vars = {**additional_env_vars} - iteration_results = [] - iterations = options.iterations if not benchmark.ignore_iterations() else 1 - for iter in range(iterations): - print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) - bench_results = benchmark.run(merged_env_vars) - if bench_results is not None: - for bench_result in bench_results: - if bench_result.passed: - print(f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit}).") - else: - print(f"complete ({bench_result.label}: verification FAILED)") - iteration_results.append(bench_result) - else: - print(f"did not finish (OK for sycl-bench).") + intermediate_results: dict[str, list[Result]] = {} + processed: list[Result] = [] + for _ in range(5): + run_iterations(benchmark, merged_env_vars, options.iterations, intermediate_results) + valid, processed = process_results(intermediate_results) + if valid: break - - if len(iteration_results) == 0: - continue - - for label in set([result.label for result in iteration_results]): - label_results = [result for result in iteration_results if result.label == label and result.passed == True] - if len(label_results) > 0: - label_results.sort(key=lambda res: res.value) - median_index = len(label_results) // 2 - median_result = label_results[median_index] - - median_result.name = label - median_result.lower_is_better = benchmark.lower_is_better() - - results.append(median_result) + results += processed except Exception as e: if options.exit_on_failure: raise e @@ -115,6 +185,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): # should this be configurable? history.load(1000) + # remove duplicates. this can happen if e.g., --compare baseline is specified manually. + compare_names = list(dict.fromkeys(compare_names)) + for name in compare_names: compare_result = history.get_compare(name) if compare_result: @@ -135,7 +208,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): # Otherwise we might be comparing the results to themselves. if not options.dry_run: history.save(saved_name, results, save_name is not None) - compare_names.append(saved_name) + if saved_name not in compare_names: + compare_names.append(saved_name) if options.output_html: html_content = generate_html(history.runs, 'oneapi-src/unified-runtime', compare_names) @@ -159,19 +233,21 @@ def validate_and_parse_env_args(env_args): parser.add_argument('benchmark_directory', type=str, help='Working directory to setup benchmarks.') parser.add_argument('--sycl', type=str, help='Root directory of the SYCL compiler.', default=None) parser.add_argument('--ur', type=str, help='UR install prefix path', default=None) + parser.add_argument('--umf', type=str, help='UMF install prefix path', default=None) parser.add_argument('--adapter', type=str, help='Options to build the Unified Runtime as part of the benchmark', default="level_zero") parser.add_argument("--no-rebuild", help='Rebuild the benchmarks from scratch.', action="store_true") parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[]) parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.') parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"]) - parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=5) - parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600) + parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=options.iterations) + parser.add_argument("--stddev-threshold", type=float, help='If stddev % is above this threshold, rerun all iterations', default=options.stddev_threshold) + parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=options.timeout) parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) - parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=0.005) + parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=options.epsilon) parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true") parser.add_argument("--exit-on-failure", help='Exit on first failure.', action="store_true") parser.add_argument("--compare-type", type=str, choices=[e.value for e in Compare], help='Compare results against previously saved data.', default=Compare.LATEST.value) - parser.add_argument("--compare-max", type=int, help='How many results to read for comparisions', default=10) + parser.add_argument("--compare-max", type=int, help='How many results to read for comparisions', default=options.compare_max) parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False) parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True) parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False) @@ -179,6 +255,7 @@ def validate_and_parse_env_args(env_args): args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) + options.workdir = args.benchmark_directory options.verbose = args.verbose options.rebuild = not args.no_rebuild options.sycl = args.sycl @@ -186,6 +263,7 @@ def validate_and_parse_env_args(env_args): options.timeout = args.timeout options.epsilon = args.epsilon options.ur = args.ur + options.umf = args.umf options.ur_adapter = args.adapter options.exit_on_failure = args.exit_on_failure options.compare = Compare(args.compare_type) diff --git a/scripts/benchmarks/output_html.py b/scripts/benchmarks/output_html.py index 4a04252797..bc9b4ffe64 100644 --- a/scripts/benchmarks/output_html.py +++ b/scripts/benchmarks/output_html.py @@ -9,8 +9,8 @@ from collections import defaultdict from dataclasses import dataclass import matplotlib.dates as mdates -import numpy as np from benches.result import BenchmarkRun, Result +import numpy as np @dataclass class BenchmarkMetadata: @@ -24,232 +24,44 @@ class BenchmarkSeries: runs: list[BenchmarkRun] @dataclass -class LatestResults: - benchmark_label: str - run_values: dict[str, float] - - @classmethod - def from_dict(cls, label: str, values: dict[str, float]) -> 'LatestResults': - return cls(benchmark_label=label, run_values=values) - -def get_latest_results(benchmarks: list[BenchmarkSeries]) -> dict[str, LatestResults]: - latest_results: dict[str, LatestResults] = {} - for benchmark in benchmarks: - run_values = { - run.name: max(run.results, key=lambda x: x.date).value - for run in benchmark.runs - } - latest_results[benchmark.label] = LatestResults.from_dict(benchmark.label, run_values) - return latest_results - -def prepare_normalized_data(latest_results: dict[str, LatestResults], - benchmarks: list[BenchmarkSeries], - group_benchmarks: list[str], - non_baseline_runs: list[str], - baseline_name: str) -> list[list[float]]: - normalized_data = [] - benchmark_map = {b.label: b for b in benchmarks} - - for run_name in non_baseline_runs: - run_data: list[float] = [] - for benchmark_label in group_benchmarks: - benchmark_data = latest_results[benchmark_label].run_values - if run_name not in benchmark_data or baseline_name not in benchmark_data: - run_data.append(None) - continue - - baseline_value = benchmark_data[baseline_name] - current_value = benchmark_data[run_name] - - normalized_value = ((baseline_value / current_value) if benchmark_map[benchmark_label].metadata.lower_is_better - else (current_value / baseline_value)) * 100 - run_data.append(normalized_value) - normalized_data.append(run_data) - return normalized_data - -def format_benchmark_label(label: str) -> list[str]: - words = re.split(' |_', label) - lines = [] - current_line = [] - - # max line length 30 - for word in words: - if len(' '.join(current_line + [word])) > 30: - lines.append(' '.join(current_line)) - current_line = [word] - else: - current_line.append(word) - - if current_line: - lines.append(' '.join(current_line)) - - return lines - -def create_bar_plot(ax: plt.Axes, - normalized_data: list[list[float]], - group_benchmarks: list[str], - non_baseline_runs: list[str], - latest_results: dict[str, LatestResults], - benchmarks: list[BenchmarkSeries], - baseline_name: str) -> float: - x = np.arange(len(group_benchmarks)) - width = 0.8 / len(non_baseline_runs) - max_height = 0 - benchmark_map = {b.label: b for b in benchmarks} - - for i, (run_name, run_data) in enumerate(zip(non_baseline_runs, normalized_data)): - offset = width * i - width * (len(non_baseline_runs) - 1) / 2 - positions = x + offset - valid_data = [v if v is not None else 0 for v in run_data] - rects = ax.bar(positions, valid_data, width, label=run_name) - - for rect, value, benchmark_label in zip(rects, run_data, group_benchmarks): - if value is not None: - height = rect.get_height() - if height > max_height: - max_height = height - - ax.text(rect.get_x() + rect.get_width()/2., height + 2, - f'{value:.1f}%', - ha='center', va='bottom') - - benchmark_data = latest_results[benchmark_label].run_values - baseline_value = benchmark_data[baseline_name] - current_value = benchmark_data[run_name] - unit = benchmark_map[benchmark_label].metadata.unit - - tooltip_labels = [ - f"Run: {run_name}\n" - f"Value: {current_value:.2f} {unit}\n" - f"Normalized to ({baseline_name}): {baseline_value:.2f} {unit}\n" - f"Normalized: {value:.1f}%" - ] - tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}') - mpld3.plugins.connect(ax.figure, tooltip) - - return max_height - -def add_chart_elements(ax: plt.Axes, - group_benchmarks: list[str], - group_name: str, - max_height: float) -> None: - top_padding = max_height * 0.2 - ax.set_ylim(0, max_height + top_padding) - ax.set_ylabel('Performance relative to baseline (%)') - ax.set_title(f'Performance Comparison (Normalized to Baseline) - {group_name} Group') - ax.set_xticks([]) - - for idx, label in enumerate(group_benchmarks): - split_labels = format_benchmark_label(label) - for i, sublabel in enumerate(split_labels): - y_pos = max_height + (top_padding * 0.5) + 2 - (i * top_padding * 0.15) - ax.text(idx, y_pos, sublabel, - ha='center', - style='italic', - color='#666666') - - ax.grid(True, axis='y', alpha=0.2) - ax.legend(bbox_to_anchor=(1, 1), loc='upper left') - -def split_large_groups(benchmark_groups): - miscellaneous = [] - new_groups = defaultdict(list) - - split_happened = False - for group, labels in benchmark_groups.items(): - if len(labels) == 1: - miscellaneous.extend(labels) - elif len(labels) > 5: - split_happened = True - mid = len(labels) // 2 - new_groups[group] = labels[:mid] - new_groups[group + '_'] = labels[mid:] - else: - new_groups[group] = labels - - if miscellaneous: - new_groups['Miscellaneous'] = miscellaneous - - if split_happened: - return split_large_groups(new_groups) - else: - return new_groups - -def group_benchmark_labels(benchmark_labels): - benchmark_groups = defaultdict(list) - for label in benchmark_labels: - group = re.match(r'^[^_\s]+', label)[0] - benchmark_groups[group].append(label) - return split_large_groups(benchmark_groups) - -def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name: str) -> list[str]: - latest_results = get_latest_results(benchmarks) - - run_names = sorted(list(set( - name for result in latest_results.values() - for name in result.run_values.keys() - ))) - - if baseline_name not in run_names: - return [] - - benchmark_labels = [b.label for b in benchmarks] - - benchmark_groups = group_benchmark_labels(benchmark_labels) - - html_charts = [] - - for group_name, group_benchmarks in benchmark_groups.items(): - plt.close('all') - non_baseline_runs = [n for n in run_names if n != baseline_name] - - if len(non_baseline_runs) == 0: - continue - - normalized_data = prepare_normalized_data( - latest_results, benchmarks, group_benchmarks, - non_baseline_runs, baseline_name - ) - - fig, ax = plt.subplots(figsize=(10, 6)) - max_height = create_bar_plot( - ax, normalized_data, group_benchmarks, non_baseline_runs, - latest_results, benchmarks, baseline_name - ) - add_chart_elements(ax, group_benchmarks, group_name, max_height) - - plt.tight_layout() - html_charts.append(mpld3.fig_to_html(fig)) - plt.close(fig) +class BenchmarkChart: + label: str + html: str - return html_charts +def tooltip_css() -> str: + return '.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}' -def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> str: +def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkChart]: plt.close('all') num_benchmarks = len(benchmarks) if num_benchmarks == 0: - return + return [] - fig, axes = plt.subplots(num_benchmarks, 1, figsize=(10, max(4 * num_benchmarks, 30))) + html_charts = [] - if num_benchmarks == 1: - axes = [axes] + for _, benchmark in enumerate(benchmarks): + fig, ax = plt.subplots(figsize=(10, 4)) - for idx, benchmark in enumerate(benchmarks): - ax = axes[idx] + all_values = [] + all_stddevs = [] for run in benchmark.runs: sorted_points = sorted(run.results, key=lambda x: x.date) dates = [point.date for point in sorted_points] values = [point.value for point in sorted_points] + stddevs = [point.stddev for point in sorted_points] + + all_values.extend(values) + all_stddevs.extend(stddevs) - ax.plot_date(dates, values, '-', label=run.name, alpha=0.5) + ax.errorbar(dates, values, yerr=stddevs, fmt='-', label=run.name, alpha=0.5) scatter = ax.scatter(dates, values, picker=True) tooltip_labels = [ f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Value: {point.value:.2f}\n" + f"Value: {point.value:.2f} {benchmark.metadata.unit}\n" + f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n" f"Git Hash: {point.git_hash}" for point in sorted_points ] @@ -258,10 +70,17 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str for point in sorted_points] tooltip = mpld3.plugins.PointHTMLTooltip(scatter, tooltip_labels, - css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}', + css=tooltip_css(), targets=targets) mpld3.plugins.connect(fig, tooltip) + # This is so that the stddev doesn't fill the entire y axis on the chart + if all_values and all_stddevs: + max_value = max(all_values) + min_value = min(all_values) + max_stddev = max(all_stddevs) + ax.set_ylim(min_value - 3 * max_stddev, max_value + 3 * max_stddev) + ax.set_title(benchmark.label, pad=20) performance_indicator = "lower is better" if benchmark.metadata.lower_is_better else "higher is better" ax.text(0.5, 1.05, f"({performance_indicator})", @@ -277,13 +96,118 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str ax.grid(True, alpha=0.2) ax.legend(bbox_to_anchor=(1, 1), loc='upper left') ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S')) - ax.xaxis.set_major_locator(mdates.AutoDateLocator()) - plt.tight_layout() - html = mpld3.fig_to_html(fig) + plt.tight_layout() + html_charts.append(BenchmarkChart(html=mpld3.fig_to_html(fig), label=benchmark.label)) + plt.close(fig) + + return html_charts + +@dataclass +class ExplicitGroup: + name: str + nnames: int + metadata: BenchmarkMetadata + runs: dict[str, dict[str, Result]] + +def create_explicit_groups(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[ExplicitGroup]: + groups = {} + counted = {} + + for run in benchmark_runs: + if run.name in compare_names: + for res in run.results: + if res.explicit_group != '': + if res.explicit_group not in groups: + counted[res.explicit_group] = 1 + + groups[res.explicit_group] = ExplicitGroup(name=res.explicit_group, nnames=len(compare_names), + metadata=BenchmarkMetadata(unit=res.unit, lower_is_better=res.lower_is_better), + runs={}) + else: + counted[res.explicit_group] = counted[res.explicit_group] + 1 + + group = groups[res.explicit_group] + if res.label not in group.runs: + group.runs[res.label] = {name: None for name in compare_names} + + if group.runs[res.label][run.name] is None: + group.runs[res.label][run.name] = res - plt.close(fig) - return html + for key, val in counted.items(): + if val == 1: + groups.pop(key) + + return list(groups.values()) + +def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]: + plt.close('all') + + html_charts = [] + + for group in groups: + fig, ax = plt.subplots(figsize=(10, 6)) + + x = np.arange(group.nnames) + x_labels = [] + width = 0.8 / len(group.runs) + + max_height = 0 + + for i, (run_name, run_results) in enumerate(group.runs.items()): + offset = width * i + + positions = x + offset + x_labels = run_results.keys() + valid_data = [r.value if r is not None else 0 for r in run_results.values()] + rects = ax.bar(positions, valid_data, width, label=run_name) + # This is a hack to disable all bar_label. Setting labels to empty doesn't work. + # We create our own labels below for each bar, this works better in mpld3. + ax.bar_label(rects, fmt='') + + for rect, run, res in zip(rects, run_results.keys(), run_results.values()): + height = rect.get_height() + if height > max_height: + max_height = height + + ax.text(rect.get_x() + rect.get_width()/2., height + 2, + f'{res.value:.1f}', + ha='center', va='bottom', fontsize=9) + + tooltip_labels = [ + f"Run: {run}\n" + f"Label: {res.label}\n" + f"Value: {res.value:.2f} {res.unit}\n" + ] + tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css=tooltip_css()) + mpld3.plugins.connect(ax.figure, tooltip) + + ax.set_xticks([]) + ax.grid(True, axis='y', alpha=0.2) + ax.set_ylabel(f"Value ({group.metadata.unit})") + ax.legend(loc='upper left') + ax.set_title(group.name, pad=20) + performance_indicator = "lower is better" if group.metadata.lower_is_better else "higher is better" + ax.text(0.5, 1.03, f"({performance_indicator})", + ha='center', + transform=ax.transAxes, + style='italic', + fontsize=7, + color='#666666') + + for idx, label in enumerate(x_labels): + # this is a hack to get labels to show above the legend + # we normalize the idx to transAxes transform and offset it a little. + x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (ax.get_xlim()[1] - ax.get_xlim()[0]) + ax.text(x_norm, 1.00, label, + transform=ax.transAxes, + color='#666666') + + plt.tight_layout() + html_charts.append(BenchmarkChart(label=group.name, html=mpld3.fig_to_html(fig))) + plt.close(fig) + + return html_charts def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[BenchmarkSeries]: benchmark_metadata: dict[str, BenchmarkMetadata] = {} @@ -319,12 +243,15 @@ def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: li return benchmark_series def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]) -> str: - baseline_name = compare_names[0] benchmarks = process_benchmark_data(benchmark_runs, compare_names) - comparison_html_charts = create_normalized_bar_chart(benchmarks, baseline_name) - timeseries_html = create_time_series_chart(benchmarks, github_repo) - comparison_charts_html = '\n'.join(f'
{chart}
' for chart in comparison_html_charts) + timeseries = create_time_series_chart(benchmarks, github_repo) + timeseries_charts_html = '\n'.join(f'
{ts.html}
' for ts in timeseries) + + explicit_groups = create_explicit_groups(benchmark_runs, compare_names) + + bar_charts = create_grouped_bar_charts(explicit_groups) + bar_charts_html = '\n'.join(f'
{bc.html}
' for bc in bar_charts) html_template = f""" @@ -375,22 +302,106 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_ margin-bottom: 16px; }} }} + .filter-container {{ + text-align: center; + margin-bottom: 24px; + }} + .filter-container input {{ + padding: 8px; + font-size: 16px; + border: 1px solid #ccc; + border-radius: 4px; + width: 400px; + max-width: 100%; + }} + details {{ + margin-bottom: 24px; + }} + summary {{ + font-size: 18px; + font-weight: 500; + cursor: pointer; + padding: 12px; + background: #e9ecef; + border-radius: 8px; + user-select: none; + }} + summary:hover {{ + background: #dee2e6; + }} +

Benchmark Results

-

Latest Results Comparison

-
- {comparison_charts_html} -
-

Historical Results

-
- {timeseries_html} +
+
+
+ Historical Results +
+ {timeseries_charts_html} +
+
+
+ Comparisons +
+ {bar_charts_html} +
+
""" - return html_template diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index d077184e5c..0bb954fab2 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -3,6 +3,7 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import gzip import os import shutil import subprocess @@ -58,7 +59,7 @@ def git_clone(dir, name, repo, commit): return repo_path def prepare_bench_cwd(dir): - # we need 2 deep to workaround a problem with a fixed relative path in cudaSift + # we need 2 deep to workaround a problem with a fixed relative paths in some velocity benchmarks options.benchmark_cwd = os.path.join(dir, 'bcwd', 'bcwd') if os.path.exists(options.benchmark_cwd): shutil.rmtree(options.benchmark_cwd) @@ -97,7 +98,7 @@ def create_build_path(directory, name): return build_path -def download(dir, url, file, untar = False): +def download(dir, url, file, untar = False, unzip = False): data_file = os.path.join(dir, file) if not Path(data_file).exists(): print(f"{data_file} does not exist, downloading") @@ -106,6 +107,10 @@ def download(dir, url, file, untar = False): file = tarfile.open(data_file) file.extractall(dir) file.close() + if unzip: + [stripped_gz, _] = os.path.splitext(data_file) + with gzip.open(data_file, 'rb') as f_in, open(stripped_gz, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) else: print(f"{data_file} exists, skipping...") return data_file