diff --git a/.github/scripts/get_system_info.sh b/.github/scripts/get_system_info.sh new file mode 100755 index 0000000000..6ca38a33ef --- /dev/null +++ b/.github/scripts/get_system_info.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash + +# Copyright (C) 2023 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# get_system_info.sh - Script for printing system info + +function check_L0_version { + if command -v dpkg &> /dev/null; then + dpkg -l | grep level-zero && return + fi + + if command -v rpm &> /dev/null; then + rpm -qa | grep level-zero && return + fi + + if command -v zypper &> /dev/null; then + zypper se level-zero && return + fi + + echo "level-zero not installed" +} + +function system_info { + echo "**********system_info**********" + cat /etc/os-release | grep -oP "PRETTY_NAME=\K.*" + cat /proc/version + echo "**********SYCL-LS**********" + source /opt/intel/oneapi/setvars.sh + sycl-ls + echo "**********VGA**********" + lspci | grep VGA + echo "**********CUDA Version**********" + if command -v nvidia-smi &> /dev/null; then + nvidia-smi + else + echo "CUDA not installed" + fi + echo "**********L0 Version**********" + check_L0_version + echo "**********ROCm Version**********" + if command -v rocminfo &> /dev/null; then + rocminfo + else + echo "ROCm not installed" + fi + echo "**********/proc/cmdline**********" + cat /proc/cmdline + echo "**********CPU info**********" + lscpu + echo "**********/proc/meminfo**********" + cat /proc/meminfo + echo "**********build/bin/urinfo**********" + $(dirname "$(readlink -f "$0")")/../../build/bin/urinfo || true + echo "******OpenCL*******" + # The driver version of OpenCL Graphics is the compute-runtime version + clinfo || echo "OpenCL not installed" + echo "**********list-environment**********" + echo "PATH=$PATH" + echo + echo "CPATH=$CPATH" + echo + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + echo + echo "LIBRARY_PATH=$LIBRARY_PATH" + echo + echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH" + echo + echo "******list-build-system-versions*******" + gcc --version 2>/dev/null || true + echo + clang --version 2>/dev/null || true + echo + make --version 2>/dev/null || true + echo "**********/proc/modules**********" + cat /proc/modules + echo "***************installed-packages***************" + # Instructions below will return some minor errors, as they are dependent on the Linux distribution. + zypper se --installed-only 2>/dev/null || true + apt list --installed 2>/dev/null || true + yum list installed 2>/dev/null || true +} + +# Call the function above to print system info. +system_info diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 780f142f33..953c9fb024 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -163,9 +163,10 @@ jobs: strategy: matrix: adapter: [ - {name: CUDA, triplet: nvptx64-nvidia-cuda}, - {name: HIP, triplet: amdgcn-amd-amdhsa}, - {name: L0, triplet: spir64} + {name: CUDA, triplet: nvptx64-nvidia-cuda, platform: ""}, + {name: HIP, triplet: amdgcn-amd-amdhsa, platform: ""}, + {name: L0, triplet: spir64, platform: ""}, + {name: OPENCL, triplet: spir64, platform: "Intel(R) OpenCL"} ] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] @@ -219,7 +220,11 @@ jobs: - name: Test adapters if: matrix.adapter.name != 'L0' working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180 + run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180 + + - name: Get information about platform + if: ${{ always() }} + run: .github/scripts/get_system_info.sh examples-build-hw: name: Build - examples on HW @@ -273,6 +278,10 @@ jobs: cat ${HOME}/.profile || true rm ${HOME}/.profile || true + - name: Get information about platform + if: ${{ always() }} + run: .github/scripts/get_system_info.sh + windows-build: name: Build - Windows strategy: @@ -282,12 +291,18 @@ jobs: {name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} ] - # TODO: building level zero loader on windows-2019 is currently broken + # TODO: building level zero loader on windows-2019 and clang-cl is currently broken exclude: - os: 'windows-2019' adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} + - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} + compiler: {c: clang-cl, cxx: clang-cl} + build_type: [Debug, Release] - compiler: [{c: cl.exe, cxx: cl.exe}, {c: clang-cl.exe, cxx: clang-cl.exe}] + compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] + include: + - compiler: {c: clang-cl, cxx: clang-cl} + toolset: "-T ClangCL" runs-on: ${{matrix.os}} steps: @@ -300,10 +315,18 @@ jobs: - name: Install prerequisites run: python3 -m pip install -r third_party/requirements.txt + - name: Install doxygen + run: | + $WorkingDir = $PWD.Path + Invoke-WebRequest -Uri https://github.com/doxygen/doxygen/releases/download/Release_1_9_8/doxygen-1.9.8.windows.x64.bin.zip -OutFile "$WorkingDir\doxygen.zip" + Expand-Archive -Path "$WorkingDir\doxygen.zip" + Add-Content $env:GITHUB_PATH "$WorkingDir\doxygen" + - name: Configure CMake run: > cmake -B${{github.workspace}}/build + ${{matrix.toolset}} -DCMAKE_C_COMPILER=${{matrix.compiler.c}} -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} -DCMAKE_POLICY_DEFAULT_CMP0094=NEW @@ -319,7 +342,7 @@ jobs: # run: cmake --build ${{github.workspace}}/build --target check-generated --config ${{matrix.build_type}} - name: Build all - run: cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} -j 2 + run: cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} -j $Env:NUMBER_OF_PROCESSORS - name: Test working-directory: ${{github.workspace}}/build diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000000..ad1ac23e7a --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,78 @@ +name: Coverage + +on: [push, pull_request] + +jobs: + ubuntu-build: + name: Build - Ubuntu + strategy: + matrix: + os: ['ubuntu-22.04'] + build_type: [Debug] + compiler: [{c: gcc, cxx: g++}] + libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON'] + pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON'] + + runs-on: ${{matrix.os}} + + steps: + - uses: actions/checkout@v3 + + - name: Install apt packages + run: | + sudo apt-get update + sudo apt-get install -y doxygen ${{matrix.compiler.c}} + + - name: Install pip packages + run: pip install -r third_party/requirements.txt + + - name: Install libbacktrace + if: matrix.libbacktrace == '-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON' + run: | + git clone https://github.com/ianlancetaylor/libbacktrace.git + cd libbacktrace + ./configure + make + sudo make install + cd .. + + - name: Download DPC++ + run: | + sudo apt install libncurses5 + wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/sycl-nightly%2F20230626/dpcpp-compiler.tar.gz + tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz + + - name: Configure CMake + run: > + cmake + -B${{github.workspace}}/build + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DUR_ENABLE_TRACING=ON + -DUR_DEVELOPER_MODE=ON + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUR_BUILD_TESTS=ON + -DUR_FORMAT_CPP_STYLE=ON + -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ + -DCMAKE_CXX_FLAGS="--coverage -fkeep-inline-functions -fkeep-static-functions" + -DCMAKE_EXE_LINKER_FLAGS="--coverage" + -DCMAKE_SHARED_LINKER_FLAGS="--coverage" + ${{matrix.libbacktrace}} + ${{matrix.pool_tracking}} + + - name: Build + run: cmake --build ${{github.workspace}}/build -j $(nproc) + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + + - name: Quick Coverage Info + working-directory: ${{github.workspace}}/build + run: ctest -T Coverage + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + gcov: true + gcov_include: source diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml new file mode 100644 index 0000000000..eebb1f7bfa --- /dev/null +++ b/.github/workflows/e2e_nightly.yml @@ -0,0 +1,118 @@ +name: E2E Nightly + +on: + schedule: + # Run every day at 23:00 UTC + - cron: '0 23 * * *' + +jobs: + e2e-build-hw: + name: Build SYCL, UR, run E2E + strategy: + matrix: + adapter: [ + {name: CUDA} + ] + build_type: [Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: ${{matrix.adapter.name}} + + steps: + # Workspace on self-hosted runners is not cleaned automatically. + # We have to delete the files created outside of using actions. + - name: Cleanup self-hosted workspace + if: always() + run: | + ls -la ./ + rm -rf ./* || true + + - name: Checkout UR + uses: actions/checkout@v4 + with: + path: ur-repo + + - name: Checkout SYCL + uses: actions/checkout@v4 + with: + repository: intel/llvm + ref: sycl + path: sycl-repo + + - name: Install pip packages + working-directory: ${{github.workspace}}/ur-repo + run: pip install -r third_party/requirements.txt + + - name: Configure CMake UR + working-directory: ${{github.workspace}}/ur-repo + run: > + cmake + -B build + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUR_ENABLE_TRACING=ON + -DUR_DEVELOPER_MODE=ON + -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON + + - name: Build UR + run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib + cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc) + + - name: Set env vars & pre setup + run: | + echo "SYCL_PREFER_UR=1" >> $GITHUB_ENV + echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV + source /opt/intel/oneapi/setvars.sh + sycl-ls + + - name: Configure SYCL + run: > + python3 sycl-repo/buildbot/configure.py + -t ${{matrix.build_type}} + -o ${{github.workspace}}/sycl_build + --cmake-gen "Unix Makefiles" + --ci-defaults --cuda --hip + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache + --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache + + - name: Build SYCL + run: cmake --build ${{github.workspace}}/sycl_build + + - name: Run check-sycl + # Remove after fixing SYCL test :: abi/layout_handler.cpp + # This issue does not affect further execution of e2e with UR. + continue-on-error: true + run: cmake --build ${{github.workspace}}/sycl_build --target check-sycl + + - name: Swap UR loader and adapters + run: | + cp ${{github.workspace}}/ur-repo/build/lib/libur_loader.so* ${{github.workspace}}/sycl_build/lib/ + cp ${{github.workspace}}/ur-repo/build/lib/libur_adapter_cuda.so* ${{github.workspace}}/sycl_build/lib/ + + - name: Set additional env. vars + run: | + echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + # Running (newly built) sycl-ls sets up some extra variables + - name: Setup SYCL variables + run: | + which clang++ sycl-ls + SYCL_PI_TRACE=-1 sycl-ls + + - name: Build e2e tests + run: > + cmake + -GNinja + -B ${{github.workspace}}/build-e2e/ + -S ${{github.workspace}}/sycl-repo/sycl/test-e2e/ + -DSYCL_TEST_E2E_TARGETS="ext_oneapi_cuda:gpu" + -DCMAKE_CXX_COMPILER="$(which clang++)" + -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py" + + - name: Run e2e tests + run: ninja -C build-e2e check-sycl-e2e diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 0000000000..4a81c94e8f --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,50 @@ +name: Nightly + +on: + schedule: + # Run every day at 23:00 UTC + - cron: '0 23 * * *' + +jobs: + long-fuzz-test: + name: Run long fuzz tests + strategy: + matrix: + build_type: [Debug, Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: 'ubuntu-22.04' + + steps: + - uses: actions/checkout@v3 + + - name: Install pip packages + run: pip install -r third_party/requirements.txt + + - name: Download DPC++ + run: | + wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2023-08-31/sycl_linux.tar.gz + mkdir dpcpp_compiler + tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler + + - name: Configure CMake + run: > + cmake + -B${{github.workspace}}/build + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DUR_ENABLE_TRACING=ON + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUR_BUILD_TESTS=ON + -DUR_USE_ASAN=ON + -DUR_USE_UBSAN=ON + -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ + + - name: Build + run: > + LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib + cmake --build ${{github.workspace}}/build -j $(nproc) + + - name: Fuzz long test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "fuzz-long" diff --git a/CMakeLists.txt b/CMakeLists.txt index fcf19a65bb..80a9f64ea7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) -project(unified-runtime VERSION 0.7.0) +project(unified-runtime VERSION 0.9.0) include(GNUInstallDirs) include(CheckCXXSourceCompiles) @@ -15,7 +15,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(helpers) if(CMAKE_SYSTEM_NAME STREQUAL Darwin) - set(CMAKE_FIND_FRAMEWORK NEVER) + set(Python3_FIND_FRAMEWORK NEVER) + set(Python3_FIND_STRATEGY LOCATION) endif() find_package(Python3 COMPONENTS Interpreter REQUIRED) @@ -34,12 +35,13 @@ option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UR_USE_TSAN "enable ThreadSanitizer" OFF) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) -option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" OFF) -option(UR_BUILD_ADAPTER_L0 "build level 0 adapter from SYCL" OFF) -option(UR_BUILD_ADAPTER_OPENCL "build opencl adapter from SYCL" OFF) -option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF) -option(UR_BUILD_ADAPTER_HIP "build hip adapter from SYCL" OFF) -option(UR_BUILD_ADAPTER_NATIVE_CPU "build native_cpu adapter from SYCL" OFF) +option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON) +option(UR_BUILD_ADAPTER_L0 "Build the Level-Zero adapter" OFF) +option(UR_BUILD_ADAPTER_OPENCL "Build the OpenCL adapter" OFF) +option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF) +option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF) +option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF) +option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable") diff --git a/README.md b/README.md index 56ef7afb52..5eea3c7570 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![CodeQL](https://github.com/oneapi-src/unified-runtime/actions/workflows/codeql.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/codeql.yml) [![Bandit](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml) [![Coverity](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) +[![codecov.io](https://codecov.io/github/oneapi-src/unified-runtime/coverage.svg?branch=main)](https://codecov.io/github/oneapi-src/unified-runtime?branch=master) ## Adapters Adapter implementations for Unified Runtime currently reside in the [SYCL repository](https://github.com/intel/llvm/tree/sycl/sycl/plugins/unified_runtime/ur). This branch contains scripts to automatically @@ -31,7 +32,7 @@ see cmake options for details. - [Adapter naming convention](#adapter-naming-convention) - [Source code generation](#source-code-generation) - [Documentation](#documentation) - +6. [Release Process](#release-process) ## Contents of the repo @@ -80,6 +81,12 @@ Tools can be acquired via instructions in [third_party](/third_party/README.md). ## Building +The requirements and instructions below are for building the project from source +without any modifications. To make modifications to the specification, please +see the +[Contribution Guide](https://oneapi-src.github.io/unified-runtime/core/CONTRIB.html) +for more detailed instructions on the correct setup. + ### Requirements Required packages: @@ -87,9 +94,6 @@ Required packages: - [CMake](https://cmake.org/) >= 3.14.0 - Python v3.6.6 or later -For development and contributions: -- clang-format-15.0 (can be installed with `python -m pip install clang-format==15.0.7`) - ### Windows Generating Visual Studio Project. EXE and binaries will be in **build/bin/{build_config}** @@ -127,12 +131,13 @@ List of options provided by CMake: | UR_USE_MSAN | Enable MemorySanitizer (clang only) | ON/OFF | OFF | | UR_ENABLE_TRACING | Enable XPTI-based tracing layer | ON/OFF | OFF | | UR_CONFORMANCE_TARGET_TRIPLES | SYCL triples to build CTS device binaries for | Comma-separated list | spir64 | -| UR_BUILD_ADAPTER_L0 | Fetch and use level-zero adapter from SYCL | ON/OFF | OFF | -| UR_BUILD_ADAPTER_OPENCL | Fetch and use opencl adapter from SYCL | ON/OFF | OFF | -| UR_BUILD_ADAPTER_CUDA | Fetch and use cuda adapter from SYCL | ON/OFF | OFF | -| UR_BUILD_ADAPTER_HIP | Fetch and use hip adapter from SYCL | ON/OFF | OFF | -| UR_BUILD_ADAPTER_NATIVE_CPU | Fetch and use native-cpu adapter from SYCL | ON/OFF | OFF | -| UR_HIP_PLATFORM | Build hip adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD | +| UR_BUILD_ADAPTER_L0 | Build the Level-Zero adapter | ON/OFF | OFF | +| UR_BUILD_ADAPTER_OPENCL | Build the OpenCL adapter | ON/OFF | OFF | +| UR_BUILD_ADAPTER_CUDA | Build the CUDA adapter | ON/OFF | OFF | +| UR_BUILD_ADAPTER_HIP | Build the HIP adapter | ON/OFF | OFF | +| UR_BUILD_ADAPTER_NATIVE_CPU | Build the Native-CPU adapter | ON/OFF | OFF | +| UR_BUILD_ADAPTER_ALL | Build all currently supported adapters | ON/OFF | OFF | +| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD | | UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD | | UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` | | UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | @@ -154,6 +159,10 @@ It will generate the source code **and** run automated code formatting: $ make generate ``` +This target has additional dependencies which are described in the *Build +Environment* section of the +[Contribution Guide](https://oneapi-src.github.io/unified-runtime/core/CONTRIB.html). + ## Contributions For those who intend to make a contribution to the project please read our @@ -176,3 +185,26 @@ Code is generated using included [Python scripts](/scripts/README.md). Documentation is generated from source code using Sphinx - see [scripts dir](/scripts/README.md) for details. + +## Release Process + +Unified Runtime releases are aligned with oneAPI releases. Once all changes +planned for a release have been accepted, the release process is defined as: + +1. Create a new release branch based on the [main][main-branch] branch taking + the form `v..x` where `x` is a placeholder for the patch + version. This branch will always contain the latest patch version for a given + release. +2. Create a PR to increment the CMake project version on the [main][main-branch] + and merge before accepting any other changes. +3. Create a new tag based on the latest commit on the release branch taking the + form `v..`. +4. Create a [new GitHub release][new-github-release] using the tag created in + the previous step. + * Prior to version 1.0, check the *Set as a pre-release* tick box. +5. Update downstream projects to utilize the release tag. If any issues arise + from integration, apply any necessary hot fixes to `v..x` + branch and go back to step 3. + +[main-branch]: https://github.com/oneapi-src/unified-runtime/tree/main +[new-github-release]: https://github.com/oneapi-src/unified-runtime/releases/new diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 3c90d41236..35c4789432 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -63,6 +63,8 @@ function(add_ur_target_compile_options name) -fPIC -Wall -Wpedantic + -Wempty-body + -Wunused-parameter $<$:-fdiagnostics-color=always> $<$:-fcolor-diagnostics> ) @@ -78,7 +80,7 @@ function(add_ur_target_compile_options name) endif() elseif(MSVC) target_compile_options(${name} PRIVATE - /MP + $<$:/MP> # clang-cl.exe does not support /MP /W3 /MD$<$:d> /GS diff --git a/examples/codegen/codegen.cpp b/examples/codegen/codegen.cpp index 203043d86d..82834688fb 100644 --- a/examples/codegen/codegen.cpp +++ b/examples/codegen/codegen.cpp @@ -24,7 +24,7 @@ constexpr unsigned PAGE_SIZE = 4096; void ur_check(const ur_result_t r) { if (r != UR_RESULT_SUCCESS) { - urTearDown(nullptr); + urLoaderTearDown(); throw std::runtime_error("Unified runtime error: " + std::to_string(r)); } } @@ -95,7 +95,7 @@ template struct alignas(PAGE_SIZE) AlignedArray { int main() { ur_loader_config_handle_t loader_config = nullptr; - ur_check(urInit(UR_DEVICE_INIT_FLAG_GPU, loader_config)); + ur_check(urLoaderInit(UR_DEVICE_INIT_FLAG_GPU, loader_config)); auto adapters = get_adapters(); auto supported_adapters = get_supported_adapters(adapters); @@ -172,5 +172,5 @@ int main() { std::cout << "Results are incorrect." << std::endl; } - return urTearDown(nullptr) == UR_RESULT_SUCCESS && expectedResult ? 0 : 1; + return urLoaderTearDown() == UR_RESULT_SUCCESS && expectedResult ? 0 : 1; } diff --git a/examples/collector/README.md b/examples/collector/README.md index aaf5eed32a..fbdf18a8ae 100644 --- a/examples/collector/README.md +++ b/examples/collector/README.md @@ -19,7 +19,7 @@ $ mkdir build $ cd build $ cmake .. -DUR_ENABLE_TRACING=ON $ make -$ UR_ADAPTERS_FORCE_LOAD=./lib/libur_adapter_null.so XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=./lib/libxptifw.so XPTI_SUBSCRIBERS=./lib/libcollector.so ./bin/hello_world +$ UR_ADAPTERS_FORCE_LOAD=./lib/libur_adapter_null.so UR_ENABLE_LAYERS=UR_LAYER_TRACING XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=./lib/libxptifw.so XPTI_SUBSCRIBERS=./lib/libcollector.so ./bin/hello_world ``` See [XPTI framework documentation](https://github.com/intel/llvm/blob/sycl/xptifw/doc/XPTI_Framework.md) for more information. diff --git a/examples/collector/collector.cpp b/examples/collector/collector.cpp index 6f2f6d57b1..910964e02c 100644 --- a/examples/collector/collector.cpp +++ b/examples/collector/collector.cpp @@ -34,15 +34,23 @@ constexpr uint16_t TRACE_FN_END = constexpr std::string_view UR_STREAM_NAME = "ur"; /** - * @brief Formats the function parameters and arguments for urInit + * @brief Formats the function parameters and arguments for urAdapterGet */ std::ostream &operator<<(std::ostream &os, - const struct ur_init_params_t *params) { - os << ".device_flags = "; - if (*params->pdevice_flags & UR_DEVICE_INIT_FLAG_GPU) { - os << "UR_DEVICE_INIT_FLAG_GPU"; - } else { - os << "0"; + const struct ur_adapter_get_params_t *params) { + os << ".NumEntries = "; + os << *params->pNumEntries; + os << ", "; + os << ".phAdapters = "; + os << *params->pphAdapters; + if (*params->pphAdapters) { + os << " (" << **params->pphAdapters << ")"; + } + os << ", "; + os << ".pNumAdapters = "; + os << *params->ppNumAdapters; + if (*params->ppNumAdapters) { + os << " (" << **params->ppNumAdapters << ")"; } os << ""; return os; @@ -50,16 +58,17 @@ std::ostream &operator<<(std::ostream &os, /** * A map of functions that format the parameters and arguments for each UR function. - * This example only implements a handler for one function, `urInit`, but it's + * This example only implements a handler for one function, `urAdapterGet`, but it's * trivial to expand it to support more. */ static std::unordered_map< std::string_view, std::function> - handlers = {{"urInit", [](const xpti::function_with_args_t *fn_args, - std::ostream &os) { - auto params = static_cast( - fn_args->args_data); + handlers = {{"urAdapterGet", [](const xpti::function_with_args_t *fn_args, + std::ostream &os) { + auto params = + static_cast( + fn_args->args_data); os << params; }}}; @@ -73,10 +82,9 @@ static std::unordered_map< * On begin, it prints the function declaration with the call arguments specified, * and on end it prints the function name with the result of the call. */ -XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, - xpti::trace_event_data_t *parent, - xpti::trace_event_data_t *event, - uint64_t instance, const void *user_data) { +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, xpti::trace_event_data_t *, + xpti::trace_event_data_t *, uint64_t instance, + const void *user_data) { auto *args = static_cast(user_data); std::ostringstream out; if (trace_type == TRACE_FN_BEGIN) { @@ -110,8 +118,7 @@ XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, * selected trace types. */ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, - unsigned int minor_version, - const char *version_str, + unsigned int minor_version, const char *, const char *stream_name) { if (stream_name == nullptr) { std::cout << "Stream name not provided. Aborting." << std::endl; @@ -149,5 +156,5 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, * * Can be used to cleanup state or resources. */ -XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { /* noop */ +XPTI_CALLBACK_API void xptiTraceFinish(const char *) { /* noop */ } diff --git a/examples/hello_world/hello_world.cpp b/examples/hello_world/hello_world.cpp index 4d903da65a..904ac6d2ef 100644 --- a/examples/hello_world/hello_world.cpp +++ b/examples/hello_world/hello_world.cpp @@ -15,13 +15,14 @@ #include "ur_api.h" ////////////////////////////////////////////////////////////////////////// -int main(int argc, char *argv[]) { +int main(int, char *[]) { ur_result_t status; // Initialize the platform - status = urInit(0, nullptr); + status = urLoaderInit(0, nullptr); if (status != UR_RESULT_SUCCESS) { - std::cout << "urInit failed with return code: " << status << std::endl; + std::cout << "urLoaderInit failed with return code: " << status + << std::endl; return 1; } std::cout << "Platform initialized.\n"; @@ -119,6 +120,6 @@ int main(int argc, char *argv[]) { for (auto adapter : adapters) { urAdapterRelease(adapter); } - urTearDown(nullptr); + urLoaderTearDown(); return status == UR_RESULT_SUCCESS ? 0 : 1; } diff --git a/include/ur.py b/include/ur.py index 45ce583f42..09b7955e07 100644 --- a/include/ur.py +++ b/include/ur.py @@ -6,7 +6,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @file ur.py - @version v0.7-r0 + @version v0.9-r0 """ import platform @@ -117,8 +117,6 @@ class ur_function_v(IntEnum): QUEUE_CREATE_WITH_NATIVE_HANDLE = 96 ## Enumerator for ::urQueueCreateWithNativeHandle QUEUE_FINISH = 97 ## Enumerator for ::urQueueFinish QUEUE_FLUSH = 98 ## Enumerator for ::urQueueFlush - INIT = 99 ## Enumerator for ::urInit - TEAR_DOWN = 100 ## Enumerator for ::urTearDown SAMPLER_CREATE = 101 ## Enumerator for ::urSamplerCreate SAMPLER_RETAIN = 102 ## Enumerator for ::urSamplerRetain SAMPLER_RELEASE = 103 ## Enumerator for ::urSamplerRelease @@ -144,9 +142,6 @@ class ur_function_v(IntEnum): COMMAND_BUFFER_FINALIZE_EXP = 123 ## Enumerator for ::urCommandBufferFinalizeExp COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125 ## Enumerator for ::urCommandBufferAppendKernelLaunchExp COMMAND_BUFFER_ENQUEUE_EXP = 128 ## Enumerator for ::urCommandBufferEnqueueExp - COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP = 129 ## Enumerator for ::urCommandBufferAppendMemcpyUSMExp - COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP = 130 ## Enumerator for ::urCommandBufferAppendMembufferCopyExp - COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP = 131 ## Enumerator for ::urCommandBufferAppendMembufferCopyRectExp USM_PITCHED_ALLOC_EXP = 132 ## Enumerator for ::urUSMPitchedAllocExp BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133## Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134 ## Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp @@ -182,10 +177,6 @@ class ur_function_v(IntEnum): USM_P2P_ENABLE_PEER_ACCESS_EXP = 165 ## Enumerator for ::urUsmP2PEnablePeerAccessExp USM_P2P_DISABLE_PEER_ACCESS_EXP = 166 ## Enumerator for ::urUsmP2PDisablePeerAccessExp USM_P2P_PEER_ACCESS_GET_INFO_EXP = 167 ## Enumerator for ::urUsmP2PPeerAccessGetInfoExp - COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP = 168 ## Enumerator for ::urCommandBufferAppendMembufferWriteExp - COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP = 169 ## Enumerator for ::urCommandBufferAppendMembufferReadExp - COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP = 170## Enumerator for ::urCommandBufferAppendMembufferWriteRectExp - COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP = 171 ## Enumerator for ::urCommandBufferAppendMembufferReadRectExp LOADER_CONFIG_CREATE = 172 ## Enumerator for ::urLoaderConfigCreate LOADER_CONFIG_RELEASE = 173 ## Enumerator for ::urLoaderConfigRelease LOADER_CONFIG_RETAIN = 174 ## Enumerator for ::urLoaderConfigRetain @@ -199,6 +190,22 @@ class ur_function_v(IntEnum): PROGRAM_BUILD_EXP = 197 ## Enumerator for ::urProgramBuildExp PROGRAM_COMPILE_EXP = 198 ## Enumerator for ::urProgramCompileExp PROGRAM_LINK_EXP = 199 ## Enumerator for ::urProgramLinkExp + LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 200 ## Enumerator for ::urLoaderConfigSetCodeLocationCallback + LOADER_INIT = 201 ## Enumerator for ::urLoaderInit + LOADER_TEAR_DOWN = 202 ## Enumerator for ::urLoaderTearDown + COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 203 ## Enumerator for ::urCommandBufferAppendUSMMemcpyExp + COMMAND_BUFFER_APPEND_USM_FILL_EXP = 204 ## Enumerator for ::urCommandBufferAppendUSMFillExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 205 ## Enumerator for ::urCommandBufferAppendMemBufferCopyExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 206## Enumerator for ::urCommandBufferAppendMemBufferWriteExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 207 ## Enumerator for ::urCommandBufferAppendMemBufferReadExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 208## Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 209 ## Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 210## Enumerator for ::urCommandBufferAppendMemBufferReadRectExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 211 ## Enumerator for ::urCommandBufferAppendMemBufferFillExp + COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 212 ## Enumerator for ::urCommandBufferAppendUSMPrefetchExp + COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213 ## Enumerator for ::urCommandBufferAppendUSMAdviseExp + ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214 ## Enumerator for ::urEnqueueCooperativeKernelLaunchExp + KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215## Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp class ur_function_t(c_int): def __str__(self): @@ -248,6 +255,8 @@ class ur_structure_type_v(IntEnum): EXP_INTEROP_SEMAPHORE_DESC = 0x2002 ## ::ur_exp_interop_semaphore_desc_t EXP_FILE_DESCRIPTOR = 0x2003 ## ::ur_exp_file_descriptor_t EXP_WIN32_HANDLE = 0x2004 ## ::ur_exp_win32_handle_t + EXP_LAYERED_IMAGE_PROPERTIES = 0x2005 ## ::ur_exp_layered_image_properties_t + EXP_SAMPLER_ADDR_MODES = 0x2006 ## ::ur_exp_sampler_addr_modes_t class ur_structure_type_t(c_int): def __str__(self): @@ -513,6 +522,24 @@ def __str__(self): return str(ur_loader_config_info_v(self.value)) +############################################################################### +## @brief Code location data +class ur_code_location_t(Structure): + _fields_ = [ + ("functionName", c_char_p), ## [in][out] Function name. + ("sourceFile", c_char_p), ## [in][out] Source code file. + ("lineNumber", c_ulong), ## [in][out] Source code line number. + ("columnNumber", c_ulong) ## [in][out] Source code column number. + ] + +############################################################################### +## @brief Code location callback with user data. +def ur_code_location_callback_t(user_defined_callback): + @CFUNCTYPE(ur_code_location_t, c_void_p) + def ur_code_location_callback_t_wrapper(pUserData): + return user_defined_callback(pUserData) + return ur_code_location_callback_t_wrapper + ############################################################################### ## @brief Supported adapter info class ur_adapter_info_v(IntEnum): @@ -573,7 +600,9 @@ def __str__(self): class ur_api_version_v(IntEnum): _0_6 = UR_MAKE_VERSION( 0, 6 ) ## version 0.6 _0_7 = UR_MAKE_VERSION( 0, 7 ) ## version 0.7 - CURRENT = UR_MAKE_VERSION( 0, 7 ) ## latest known version + _0_8 = UR_MAKE_VERSION( 0, 8 ) ## version 0.8 + _0_9 = UR_MAKE_VERSION( 0, 9 ) ## version 0.9 + CURRENT = UR_MAKE_VERSION( 0, 9 ) ## latest known version class ur_api_version_t(c_int): def __str__(self): @@ -2100,10 +2129,10 @@ class ur_event_native_properties_t(Structure): ############################################################################### ## @brief Event states for all events. class ur_execution_info_v(IntEnum): - EXECUTION_INFO_COMPLETE = 0 ## Indicates that the event has completed. - EXECUTION_INFO_RUNNING = 1 ## Indicates that the device has started processing this event. - EXECUTION_INFO_SUBMITTED = 2 ## Indicates that the event has been submitted by the host to the device. - EXECUTION_INFO_QUEUED = 3 ## Indicates that the event has been queued, this is the initial state of + COMPLETE = 0 ## Indicates that the event has completed. + RUNNING = 1 ## Indicates that the device has started processing this event. + SUBMITTED = 2 ## Indicates that the event has been submitted by the host to the device. + QUEUED = 3 ## Indicates that the event has been queued, this is the initial state of ## events. class ur_execution_info_t(c_int): @@ -2213,6 +2242,20 @@ class ur_exp_sampler_mip_properties_t(Structure): ("mipFilterMode", ur_sampler_filter_mode_t) ## [in] mipmap filter mode used for filtering between mipmap levels ] +############################################################################### +## @brief Describes unique sampler addressing mode per dimension +## +## @details +## - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t +## as part of a `pNext` chain. +class ur_exp_sampler_addr_modes_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES + ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure + ("addrModes", ur_sampler_addressing_mode_t * 3) ## [in] Specify the address mode of the sampler per dimension + ] + ############################################################################### ## @brief Describes an interop memory resource descriptor class ur_exp_interop_mem_desc_t(Structure): @@ -2231,6 +2274,21 @@ class ur_exp_interop_semaphore_desc_t(Structure): ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure ] +############################################################################### +## @brief Describes layered image properties +## +## @details +## - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp +## or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as +## part of a `pNext` chain. +class ur_exp_layered_image_properties_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES + ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure + ("numLayers", c_ulong) ## [in] number of layers the image should have + ] + ############################################################################### ## @brief The extension string which defines support for command-buffers which ## is returned when querying device extensions. @@ -2256,6 +2314,11 @@ class ur_exp_command_buffer_sync_point_t(c_ulong): class ur_exp_command_buffer_handle_t(c_void_p): pass +############################################################################### +## @brief The extension string which defines support for cooperative-kernels +## which is returned when querying device extensions. +UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP = "ur_exp_cooperative_kernels" + ############################################################################### ## @brief The extension string which defines support for test ## which is returned when querying device extensions. @@ -2735,6 +2798,21 @@ class ur_kernel_dditable_t(Structure): ("pfnSetSpecializationConstants", c_void_p) ## _urKernelSetSpecializationConstants_t ] +############################################################################### +## @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp +if __use_win_types: + _urKernelSuggestMaxCooperativeGroupCountExp_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) +else: + _urKernelSuggestMaxCooperativeGroupCountExp_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) + + +############################################################################### +## @brief Table of KernelExp functions pointers +class ur_kernel_exp_dditable_t(Structure): + _fields_ = [ + ("pfnSuggestMaxCooperativeGroupCountExp", c_void_p) ## _urKernelSuggestMaxCooperativeGroupCountExp_t + ] + ############################################################################### ## @brief Function-pointer for urSamplerCreate if __use_win_types: @@ -2908,6 +2986,53 @@ class ur_physical_mem_dditable_t(Structure): ("pfnRelease", c_void_p) ## _urPhysicalMemRelease_t ] +############################################################################### +## @brief Function-pointer for urAdapterGet +if __use_win_types: + _urAdapterGet_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) +else: + _urAdapterGet_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) + +############################################################################### +## @brief Function-pointer for urAdapterRelease +if __use_win_types: + _urAdapterRelease_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) +else: + _urAdapterRelease_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) + +############################################################################### +## @brief Function-pointer for urAdapterRetain +if __use_win_types: + _urAdapterRetain_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) +else: + _urAdapterRetain_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) + +############################################################################### +## @brief Function-pointer for urAdapterGetLastError +if __use_win_types: + _urAdapterGetLastError_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) +else: + _urAdapterGetLastError_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) + +############################################################################### +## @brief Function-pointer for urAdapterGetInfo +if __use_win_types: + _urAdapterGetInfo_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) +else: + _urAdapterGetInfo_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) + + +############################################################################### +## @brief Table of Global functions pointers +class ur_global_dditable_t(Structure): + _fields_ = [ + ("pfnAdapterGet", c_void_p), ## _urAdapterGet_t + ("pfnAdapterRelease", c_void_p), ## _urAdapterRelease_t + ("pfnAdapterRetain", c_void_p), ## _urAdapterRetain_t + ("pfnAdapterGetLastError", c_void_p), ## _urAdapterGetLastError_t + ("pfnAdapterGetInfo", c_void_p) ## _urAdapterGetInfo_t + ] + ############################################################################### ## @brief Function-pointer for urEnqueueKernelLaunch if __use_win_types: @@ -3115,6 +3240,21 @@ class ur_enqueue_dditable_t(Structure): ("pfnWriteHostPipe", c_void_p) ## _urEnqueueWriteHostPipe_t ] +############################################################################### +## @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp +if __use_win_types: + _urEnqueueCooperativeKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) +else: + _urEnqueueCooperativeKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) + + +############################################################################### +## @brief Table of EnqueueExp functions pointers +class ur_enqueue_exp_dditable_t(Structure): + _fields_ = [ + ("pfnCooperativeKernelLaunchExp", c_void_p) ## _urEnqueueCooperativeKernelLaunchExp_t + ] + ############################################################################### ## @brief Function-pointer for urQueueGetInfo if __use_win_types: @@ -3475,53 +3615,81 @@ class ur_usm_exp_dditable_t(Structure): _urCommandBufferAppendKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemcpyUSMExp +## @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp +if __use_win_types: + _urCommandBufferAppendUSMMemcpyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMMemcpyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMFillExp if __use_win_types: - _urCommandBufferAppendMemcpyUSMExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendUSMFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMemcpyUSMExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendUSMFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferCopyExp +## @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp if __use_win_types: - _urCommandBufferAppendMembufferCopyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferCopyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferCopyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferCopyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferWriteExp +## @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp if __use_win_types: - _urCommandBufferAppendMembufferWriteExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferWriteExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferReadExp +## @brief Function-pointer for urCommandBufferAppendMemBufferReadExp if __use_win_types: - _urCommandBufferAppendMembufferReadExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferReadExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferCopyRectExp +## @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp if __use_win_types: - _urCommandBufferAppendMembufferCopyRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferCopyRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferCopyRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferCopyRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferWriteRectExp +## @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp if __use_win_types: - _urCommandBufferAppendMembufferWriteRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferWriteRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferReadRectExp +## @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp if __use_win_types: - _urCommandBufferAppendMembufferReadRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferReadRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendMemBufferFillExp +if __use_win_types: + _urCommandBufferAppendMemBufferFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendMemBufferFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp +if __use_win_types: + _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp +if __use_win_types: + _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### ## @brief Function-pointer for urCommandBufferEnqueueExp @@ -3540,13 +3708,17 @@ class ur_command_buffer_exp_dditable_t(Structure): ("pfnReleaseExp", c_void_p), ## _urCommandBufferReleaseExp_t ("pfnFinalizeExp", c_void_p), ## _urCommandBufferFinalizeExp_t ("pfnAppendKernelLaunchExp", c_void_p), ## _urCommandBufferAppendKernelLaunchExp_t - ("pfnAppendMemcpyUSMExp", c_void_p), ## _urCommandBufferAppendMemcpyUSMExp_t - ("pfnAppendMembufferCopyExp", c_void_p), ## _urCommandBufferAppendMembufferCopyExp_t - ("pfnAppendMembufferWriteExp", c_void_p), ## _urCommandBufferAppendMembufferWriteExp_t - ("pfnAppendMembufferReadExp", c_void_p), ## _urCommandBufferAppendMembufferReadExp_t - ("pfnAppendMembufferCopyRectExp", c_void_p), ## _urCommandBufferAppendMembufferCopyRectExp_t - ("pfnAppendMembufferWriteRectExp", c_void_p), ## _urCommandBufferAppendMembufferWriteRectExp_t - ("pfnAppendMembufferReadRectExp", c_void_p), ## _urCommandBufferAppendMembufferReadRectExp_t + ("pfnAppendUSMMemcpyExp", c_void_p), ## _urCommandBufferAppendUSMMemcpyExp_t + ("pfnAppendUSMFillExp", c_void_p), ## _urCommandBufferAppendUSMFillExp_t + ("pfnAppendMemBufferCopyExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyExp_t + ("pfnAppendMemBufferWriteExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteExp_t + ("pfnAppendMemBufferReadExp", c_void_p), ## _urCommandBufferAppendMemBufferReadExp_t + ("pfnAppendMemBufferCopyRectExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyRectExp_t + ("pfnAppendMemBufferWriteRectExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteRectExp_t + ("pfnAppendMemBufferReadRectExp", c_void_p), ## _urCommandBufferAppendMemBufferReadRectExp_t + ("pfnAppendMemBufferFillExp", c_void_p), ## _urCommandBufferAppendMemBufferFillExp_t + ("pfnAppendUSMPrefetchExp", c_void_p), ## _urCommandBufferAppendUSMPrefetchExp_t + ("pfnAppendUSMAdviseExp", c_void_p), ## _urCommandBufferAppendUSMAdviseExp_t ("pfnEnqueueExp", c_void_p) ## _urCommandBufferEnqueueExp_t ] @@ -3581,69 +3753,6 @@ class ur_usm_p2p_exp_dditable_t(Structure): ("pfnPeerAccessGetInfoExp", c_void_p) ## _urUsmP2PPeerAccessGetInfoExp_t ] -############################################################################### -## @brief Function-pointer for urInit -if __use_win_types: - _urInit_t = WINFUNCTYPE( ur_result_t, ur_device_init_flags_t, ur_loader_config_handle_t ) -else: - _urInit_t = CFUNCTYPE( ur_result_t, ur_device_init_flags_t, ur_loader_config_handle_t ) - -############################################################################### -## @brief Function-pointer for urTearDown -if __use_win_types: - _urTearDown_t = WINFUNCTYPE( ur_result_t, c_void_p ) -else: - _urTearDown_t = CFUNCTYPE( ur_result_t, c_void_p ) - -############################################################################### -## @brief Function-pointer for urAdapterGet -if __use_win_types: - _urAdapterGet_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) -else: - _urAdapterGet_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urAdapterRelease -if __use_win_types: - _urAdapterRelease_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRelease_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterRetain -if __use_win_types: - _urAdapterRetain_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRetain_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterGetLastError -if __use_win_types: - _urAdapterGetLastError_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) -else: - _urAdapterGetLastError_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) - -############################################################################### -## @brief Function-pointer for urAdapterGetInfo -if __use_win_types: - _urAdapterGetInfo_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urAdapterGetInfo_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of Global functions pointers -class ur_global_dditable_t(Structure): - _fields_ = [ - ("pfnInit", c_void_p), ## _urInit_t - ("pfnTearDown", c_void_p), ## _urTearDown_t - ("pfnAdapterGet", c_void_p), ## _urAdapterGet_t - ("pfnAdapterRelease", c_void_p), ## _urAdapterRelease_t - ("pfnAdapterRetain", c_void_p), ## _urAdapterRetain_t - ("pfnAdapterGetLastError", c_void_p), ## _urAdapterGetLastError_t - ("pfnAdapterGetInfo", c_void_p) ## _urAdapterGetInfo_t - ] - ############################################################################### ## @brief Function-pointer for urVirtualMemGranularityGetInfo if __use_win_types: @@ -3795,17 +3904,19 @@ class ur_dditable_t(Structure): ("Program", ur_program_dditable_t), ("ProgramExp", ur_program_exp_dditable_t), ("Kernel", ur_kernel_dditable_t), + ("KernelExp", ur_kernel_exp_dditable_t), ("Sampler", ur_sampler_dditable_t), ("Mem", ur_mem_dditable_t), ("PhysicalMem", ur_physical_mem_dditable_t), + ("Global", ur_global_dditable_t), ("Enqueue", ur_enqueue_dditable_t), + ("EnqueueExp", ur_enqueue_exp_dditable_t), ("Queue", ur_queue_dditable_t), ("BindlessImagesExp", ur_bindless_images_exp_dditable_t), ("USM", ur_usm_dditable_t), ("USMExp", ur_usm_exp_dditable_t), ("CommandBufferExp", ur_command_buffer_exp_dditable_t), ("UsmP2PExp", ur_usm_p2p_exp_dditable_t), - ("Global", ur_global_dditable_t), ("VirtualMem", ur_virtual_mem_dditable_t), ("Device", ur_device_dditable_t) ] @@ -3824,7 +3935,7 @@ def __init__(self, version : ur_api_version_t): self.__dditable = ur_dditable_t() # initialize the UR - self.__dll.urInit(0, 0) + self.__dll.urLoaderInit(0, 0) # call driver to get function pointers Platform = ur_platform_dditable_t() @@ -3932,6 +4043,16 @@ def __init__(self, version : ur_api_version_t): self.urKernelSetArgMemObj = _urKernelSetArgMemObj_t(self.__dditable.Kernel.pfnSetArgMemObj) self.urKernelSetSpecializationConstants = _urKernelSetSpecializationConstants_t(self.__dditable.Kernel.pfnSetSpecializationConstants) + # call driver to get function pointers + KernelExp = ur_kernel_exp_dditable_t() + r = ur_result_v(self.__dll.urGetKernelExpProcAddrTable(version, byref(KernelExp))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.KernelExp = KernelExp + + # attach function interface to function address + self.urKernelSuggestMaxCooperativeGroupCountExp = _urKernelSuggestMaxCooperativeGroupCountExp_t(self.__dditable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp) + # call driver to get function pointers Sampler = ur_sampler_dditable_t() r = ur_result_v(self.__dll.urGetSamplerProcAddrTable(version, byref(Sampler))) @@ -3978,6 +4099,20 @@ def __init__(self, version : ur_api_version_t): self.urPhysicalMemRetain = _urPhysicalMemRetain_t(self.__dditable.PhysicalMem.pfnRetain) self.urPhysicalMemRelease = _urPhysicalMemRelease_t(self.__dditable.PhysicalMem.pfnRelease) + # call driver to get function pointers + Global = ur_global_dditable_t() + r = ur_result_v(self.__dll.urGetGlobalProcAddrTable(version, byref(Global))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.Global = Global + + # attach function interface to function address + self.urAdapterGet = _urAdapterGet_t(self.__dditable.Global.pfnAdapterGet) + self.urAdapterRelease = _urAdapterRelease_t(self.__dditable.Global.pfnAdapterRelease) + self.urAdapterRetain = _urAdapterRetain_t(self.__dditable.Global.pfnAdapterRetain) + self.urAdapterGetLastError = _urAdapterGetLastError_t(self.__dditable.Global.pfnAdapterGetLastError) + self.urAdapterGetInfo = _urAdapterGetInfo_t(self.__dditable.Global.pfnAdapterGetInfo) + # call driver to get function pointers Enqueue = ur_enqueue_dditable_t() r = ur_result_v(self.__dll.urGetEnqueueProcAddrTable(version, byref(Enqueue))) @@ -4012,6 +4147,16 @@ def __init__(self, version : ur_api_version_t): self.urEnqueueReadHostPipe = _urEnqueueReadHostPipe_t(self.__dditable.Enqueue.pfnReadHostPipe) self.urEnqueueWriteHostPipe = _urEnqueueWriteHostPipe_t(self.__dditable.Enqueue.pfnWriteHostPipe) + # call driver to get function pointers + EnqueueExp = ur_enqueue_exp_dditable_t() + r = ur_result_v(self.__dll.urGetEnqueueExpProcAddrTable(version, byref(EnqueueExp))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.EnqueueExp = EnqueueExp + + # attach function interface to function address + self.urEnqueueCooperativeKernelLaunchExp = _urEnqueueCooperativeKernelLaunchExp_t(self.__dditable.EnqueueExp.pfnCooperativeKernelLaunchExp) + # call driver to get function pointers Queue = ur_queue_dditable_t() r = ur_result_v(self.__dll.urGetQueueProcAddrTable(version, byref(Queue))) @@ -4098,13 +4243,17 @@ def __init__(self, version : ur_api_version_t): self.urCommandBufferReleaseExp = _urCommandBufferReleaseExp_t(self.__dditable.CommandBufferExp.pfnReleaseExp) self.urCommandBufferFinalizeExp = _urCommandBufferFinalizeExp_t(self.__dditable.CommandBufferExp.pfnFinalizeExp) self.urCommandBufferAppendKernelLaunchExp = _urCommandBufferAppendKernelLaunchExp_t(self.__dditable.CommandBufferExp.pfnAppendKernelLaunchExp) - self.urCommandBufferAppendMemcpyUSMExp = _urCommandBufferAppendMemcpyUSMExp_t(self.__dditable.CommandBufferExp.pfnAppendMemcpyUSMExp) - self.urCommandBufferAppendMembufferCopyExp = _urCommandBufferAppendMembufferCopyExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferCopyExp) - self.urCommandBufferAppendMembufferWriteExp = _urCommandBufferAppendMembufferWriteExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferWriteExp) - self.urCommandBufferAppendMembufferReadExp = _urCommandBufferAppendMembufferReadExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferReadExp) - self.urCommandBufferAppendMembufferCopyRectExp = _urCommandBufferAppendMembufferCopyRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferCopyRectExp) - self.urCommandBufferAppendMembufferWriteRectExp = _urCommandBufferAppendMembufferWriteRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferWriteRectExp) - self.urCommandBufferAppendMembufferReadRectExp = _urCommandBufferAppendMembufferReadRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferReadRectExp) + self.urCommandBufferAppendUSMMemcpyExp = _urCommandBufferAppendUSMMemcpyExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMMemcpyExp) + self.urCommandBufferAppendUSMFillExp = _urCommandBufferAppendUSMFillExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMFillExp) + self.urCommandBufferAppendMemBufferCopyExp = _urCommandBufferAppendMemBufferCopyExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyExp) + self.urCommandBufferAppendMemBufferWriteExp = _urCommandBufferAppendMemBufferWriteExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteExp) + self.urCommandBufferAppendMemBufferReadExp = _urCommandBufferAppendMemBufferReadExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadExp) + self.urCommandBufferAppendMemBufferCopyRectExp = _urCommandBufferAppendMemBufferCopyRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyRectExp) + self.urCommandBufferAppendMemBufferWriteRectExp = _urCommandBufferAppendMemBufferWriteRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteRectExp) + self.urCommandBufferAppendMemBufferReadRectExp = _urCommandBufferAppendMemBufferReadRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadRectExp) + self.urCommandBufferAppendMemBufferFillExp = _urCommandBufferAppendMemBufferFillExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferFillExp) + self.urCommandBufferAppendUSMPrefetchExp = _urCommandBufferAppendUSMPrefetchExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMPrefetchExp) + self.urCommandBufferAppendUSMAdviseExp = _urCommandBufferAppendUSMAdviseExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMAdviseExp) self.urCommandBufferEnqueueExp = _urCommandBufferEnqueueExp_t(self.__dditable.CommandBufferExp.pfnEnqueueExp) # call driver to get function pointers @@ -4119,22 +4268,6 @@ def __init__(self, version : ur_api_version_t): self.urUsmP2PDisablePeerAccessExp = _urUsmP2PDisablePeerAccessExp_t(self.__dditable.UsmP2PExp.pfnDisablePeerAccessExp) self.urUsmP2PPeerAccessGetInfoExp = _urUsmP2PPeerAccessGetInfoExp_t(self.__dditable.UsmP2PExp.pfnPeerAccessGetInfoExp) - # call driver to get function pointers - Global = ur_global_dditable_t() - r = ur_result_v(self.__dll.urGetGlobalProcAddrTable(version, byref(Global))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Global = Global - - # attach function interface to function address - self.urInit = _urInit_t(self.__dditable.Global.pfnInit) - self.urTearDown = _urTearDown_t(self.__dditable.Global.pfnTearDown) - self.urAdapterGet = _urAdapterGet_t(self.__dditable.Global.pfnAdapterGet) - self.urAdapterRelease = _urAdapterRelease_t(self.__dditable.Global.pfnAdapterRelease) - self.urAdapterRetain = _urAdapterRetain_t(self.__dditable.Global.pfnAdapterRetain) - self.urAdapterGetLastError = _urAdapterGetLastError_t(self.__dditable.Global.pfnAdapterGetLastError) - self.urAdapterGetInfo = _urAdapterGetInfo_t(self.__dditable.Global.pfnAdapterGetInfo) - # call driver to get function pointers VirtualMem = ur_virtual_mem_dditable_t() r = ur_result_v(self.__dll.urGetVirtualMemProcAddrTable(version, byref(VirtualMem))) diff --git a/include/ur_api.h b/include/ur_api.h index 11a0c697d2..63f5fc8083 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -7,7 +7,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ur_api.h - * @version v0.7-r0 + * @version v0.9-r0 * */ #ifndef UR_API_H_INCLUDED @@ -126,8 +126,6 @@ typedef enum ur_function_t { UR_FUNCTION_QUEUE_CREATE_WITH_NATIVE_HANDLE = 96, ///< Enumerator for ::urQueueCreateWithNativeHandle UR_FUNCTION_QUEUE_FINISH = 97, ///< Enumerator for ::urQueueFinish UR_FUNCTION_QUEUE_FLUSH = 98, ///< Enumerator for ::urQueueFlush - UR_FUNCTION_INIT = 99, ///< Enumerator for ::urInit - UR_FUNCTION_TEAR_DOWN = 100, ///< Enumerator for ::urTearDown UR_FUNCTION_SAMPLER_CREATE = 101, ///< Enumerator for ::urSamplerCreate UR_FUNCTION_SAMPLER_RETAIN = 102, ///< Enumerator for ::urSamplerRetain UR_FUNCTION_SAMPLER_RELEASE = 103, ///< Enumerator for ::urSamplerRelease @@ -153,9 +151,6 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP = 123, ///< Enumerator for ::urCommandBufferFinalizeExp UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125, ///< Enumerator for ::urCommandBufferAppendKernelLaunchExp UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP = 128, ///< Enumerator for ::urCommandBufferEnqueueExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP = 129, ///< Enumerator for ::urCommandBufferAppendMemcpyUSMExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP = 130, ///< Enumerator for ::urCommandBufferAppendMembufferCopyExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP = 131, ///< Enumerator for ::urCommandBufferAppendMembufferCopyRectExp UR_FUNCTION_USM_PITCHED_ALLOC_EXP = 132, ///< Enumerator for ::urUSMPitchedAllocExp UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133, ///< Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134, ///< Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp @@ -191,10 +186,6 @@ typedef enum ur_function_t { UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP = 165, ///< Enumerator for ::urUsmP2PEnablePeerAccessExp UR_FUNCTION_USM_P2P_DISABLE_PEER_ACCESS_EXP = 166, ///< Enumerator for ::urUsmP2PDisablePeerAccessExp UR_FUNCTION_USM_P2P_PEER_ACCESS_GET_INFO_EXP = 167, ///< Enumerator for ::urUsmP2PPeerAccessGetInfoExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP = 168, ///< Enumerator for ::urCommandBufferAppendMembufferWriteExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP = 169, ///< Enumerator for ::urCommandBufferAppendMembufferReadExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP = 170, ///< Enumerator for ::urCommandBufferAppendMembufferWriteRectExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP = 171, ///< Enumerator for ::urCommandBufferAppendMembufferReadRectExp UR_FUNCTION_LOADER_CONFIG_CREATE = 172, ///< Enumerator for ::urLoaderConfigCreate UR_FUNCTION_LOADER_CONFIG_RELEASE = 173, ///< Enumerator for ::urLoaderConfigRelease UR_FUNCTION_LOADER_CONFIG_RETAIN = 174, ///< Enumerator for ::urLoaderConfigRetain @@ -208,6 +199,22 @@ typedef enum ur_function_t { UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp + UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 200, ///< Enumerator for ::urLoaderConfigSetCodeLocationCallback + UR_FUNCTION_LOADER_INIT = 201, ///< Enumerator for ::urLoaderInit + UR_FUNCTION_LOADER_TEAR_DOWN = 202, ///< Enumerator for ::urLoaderTearDown + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 203, ///< Enumerator for ::urCommandBufferAppendUSMMemcpyExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP = 204, ///< Enumerator for ::urCommandBufferAppendUSMFillExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 205, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 206, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 207, ///< Enumerator for ::urCommandBufferAppendMemBufferReadExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 208, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 209, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 210, ///< Enumerator for ::urCommandBufferAppendMemBufferReadRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 211, ///< Enumerator for ::urCommandBufferAppendMemBufferFillExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 212, ///< Enumerator for ::urCommandBufferAppendUSMPrefetchExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp + UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp + UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -217,46 +224,48 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES = 0x2005, ///< ::ur_exp_layered_image_properties_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2006, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -518,9 +527,9 @@ typedef struct ur_rect_region_t { #if !defined(__GNUC__) #pragma endregion #endif -// Intel 'oneAPI' Unified Runtime APIs for Runtime +// Intel 'oneAPI' Unified Runtime APIs for Loader #if !defined(__GNUC__) -#pragma region runtime +#pragma region loader #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device initialization flags @@ -671,21 +680,64 @@ urLoaderConfigEnableLayer( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Initialize the 'oneAPI' adapter(s) +/// @brief Code location data +typedef struct ur_code_location_t { + const char *functionName; ///< [in][out] Function name. + const char *sourceFile; ///< [in][out] Source code file. + uint32_t lineNumber; ///< [in][out] Source code line number. + uint32_t columnNumber; ///< [in][out] Source code column number. + +} ur_code_location_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Code location callback with user data. +typedef ur_code_location_t (*ur_code_location_callback_t)( + void *pUserData ///< [in][out] pointer to data to be passed to callback +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Set a function callback for use by the loader to retrieve code +/// location information. +/// +/// @details +/// - The code location callback is optional and provides additional +/// information to the tracing layer about the entry point of the current +/// execution flow. +/// - This functionality can be used to match traced unified runtime +/// function calls with higher-level user calls. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hLoaderConfig` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnCodeloc` +UR_APIEXPORT ur_result_t UR_APICALL +urLoaderConfigSetCodeLocationCallback( + ur_loader_config_handle_t hLoaderConfig, ///< [in] Handle to config object the layer will be enabled for. + ur_code_location_callback_t pfnCodeloc, ///< [in] Function pointer to code location callback. + void *pUserData ///< [in][out][optional] pointer to data to be passed to callback. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Initialize the 'oneAPI' loader /// /// @details /// - The application must call this function before calling any other /// function. /// - If this function is not called then all other functions will return /// ::UR_RESULT_ERROR_UNINITIALIZED. -/// - Only one instance of each adapter will be initialized per process. +/// - Only one instance of the loader will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios -/// where multiple libraries may initialize the adapter(s) simultaneously. +/// where multiple libraries may initialize the loader simultaneously. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -696,28 +748,32 @@ urLoaderConfigEnableLayer( /// + `::UR_DEVICE_INIT_FLAGS_MASK & device_flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL -urInit( +urLoaderInit( ur_device_init_flags_t device_flags, ///< [in] device initialization flags. ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. ur_loader_config_handle_t hLoaderConfig ///< [in][optional] Handle of loader config handle. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Tear down the 'oneAPI' instance and release all its resources +/// @brief Tear down the 'oneAPI' loader and release all its resources /// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED /// - ::UR_RESULT_ERROR_DEVICE_LOST /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pParams` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL -urTearDown( - void *pParams ///< [in] pointer to tear down parameters -); +urLoaderTearDown( + void); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime APIs for Adapter +#if !defined(__GNUC__) +#pragma region adapter +#endif /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves all available adapters /// @@ -756,7 +812,9 @@ urAdapterGet( /// /// @details /// - When the reference count of the adapter reaches zero, the adapter may -/// perform adapter-specififc resource teardown +/// perform adapter-specififc resource teardown. Resources must be left in +/// a state where it safe for the adapter to be subsequently reinitialized +/// with ::urAdapterGet /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -937,6 +995,7 @@ typedef enum ur_adapter_backend_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phAdapters` /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phPlatforms != NULL` UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( ur_adapter_handle_t *phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms. @@ -1025,7 +1084,9 @@ urPlatformGetInfo( typedef enum ur_api_version_t { UR_API_VERSION_0_6 = UR_MAKE_VERSION(0, 6), ///< version 0.6 UR_API_VERSION_0_7 = UR_MAKE_VERSION(0, 7), ///< version 0.7 - UR_API_VERSION_CURRENT = UR_MAKE_VERSION(0, 7), ///< latest known version + UR_API_VERSION_0_8 = UR_MAKE_VERSION(0, 8), ///< version 0.8 + UR_API_VERSION_0_9 = UR_MAKE_VERSION(0, 9), ///< version 0.9 + UR_API_VERSION_CURRENT = UR_MAKE_VERSION(0, 9), ///< latest known version /// @cond UR_API_VERSION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1295,14 +1356,18 @@ typedef enum ur_device_type_t { /// + `NULL == hPlatform` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_TYPE_VPU < DeviceType` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phDevices != NULL` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NumEntries > 0 && phDevices == NULL` /// - ::UR_RESULT_ERROR_INVALID_VALUE UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t *phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. ///< If NumEntries is less than the number of devices available, then @@ -1708,6 +1773,7 @@ typedef struct ur_device_partition_properties_t { /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// + `NULL == pProperties->pProperties` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT UR_APIEXPORT ur_result_t UR_APICALL @@ -2020,6 +2086,8 @@ typedef struct ur_context_properties_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` /// + `NULL == phContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_CONTEXT_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY UR_APIEXPORT ur_result_t UR_APICALL @@ -3264,6 +3332,8 @@ typedef struct ur_usm_pool_limits_desc_t { /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -3308,6 +3378,8 @@ urUSMHostAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -3354,6 +3426,8 @@ urUSMDeviceAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -3796,6 +3870,8 @@ typedef struct ur_physical_mem_properties_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phPhysicalMem` /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -4873,6 +4949,8 @@ typedef struct ur_kernel_arg_mem_obj_properties_t { /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_MEM_FLAGS_MASK & pProperties->memoryAccess` /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( @@ -5126,12 +5204,15 @@ typedef struct ur_queue_index_properties_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_QUEUE_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phQueue` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE -/// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW` +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -5491,6 +5572,8 @@ urEventGetInfo( /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -5652,11 +5735,11 @@ urEventCreateWithNativeHandle( /////////////////////////////////////////////////////////////////////////////// /// @brief Event states for all events. typedef enum ur_execution_info_t { - UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE = 0, ///< Indicates that the event has completed. - UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING = 1, ///< Indicates that the device has started processing this event. - UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED = 2, ///< Indicates that the event has been submitted by the host to the device. - UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED = 3, ///< Indicates that the event has been queued, this is the initial state of - ///< events. + UR_EXECUTION_INFO_COMPLETE = 0, ///< Indicates that the event has completed. + UR_EXECUTION_INFO_RUNNING = 1, ///< Indicates that the device has started processing this event. + UR_EXECUTION_INFO_SUBMITTED = 2, ///< Indicates that the event has been submitted by the host to the device. + UR_EXECUTION_INFO_QUEUED = 3, ///< Indicates that the event has been queued, this is the initial state of + ///< events. /// @cond UR_EXECUTION_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -5679,6 +5762,8 @@ typedef void (*ur_event_callback_t)( /// - The registered callback function will be called when the execution /// status of command associated with event changes to an execution status /// equal to or past the status specified by command_exec_status. +/// - `execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the +/// initial state of all events. /// - The application may call this function from simultaneous threads for /// the same context. /// - The implementation of this function should be thread-safe. @@ -5691,9 +5776,11 @@ typedef void (*ur_event_callback_t)( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus` +/// + `::UR_EXECUTION_INFO_QUEUED < execStatus` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pfnNotify` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + `execStatus == UR_EXECUTION_INFO_QUEUED` UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( ur_event_handle_t hEvent, ///< [in] handle of the event object @@ -5885,7 +5972,7 @@ urEnqueueEventsWaitWithBarrier( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -5934,7 +6021,7 @@ urEnqueueMemBufferRead( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being written @@ -5993,7 +6080,7 @@ urEnqueueMemBufferWrite( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -6059,7 +6146,7 @@ urEnqueueMemBufferReadRect( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -6112,8 +6199,8 @@ urEnqueueMemBufferWriteRect( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_mem_handle_t hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -6165,8 +6252,8 @@ urEnqueueMemBufferCopy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_mem_handle_t hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t region, ///< [in] source 3D rectangular region descriptor: width, height, depth @@ -6209,13 +6296,18 @@ urEnqueueMemBufferCopyRect( /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + `offset % patternSize != 0` /// + If `offset + size` results in an out-of-bounds access. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object const void *pPattern, ///< [in] pointer to the fill pattern size_t patternSize, ///< [in] size in bytes of the pattern size_t offset, ///< [in] offset into the buffer @@ -6258,12 +6350,14 @@ urEnqueueMemBufferFill( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image ur_rect_region_t region, ///< [in] defines the (width, height, depth) in pixels of the 1D, 2D, or 3D @@ -6309,12 +6403,14 @@ urEnqueueMemImageRead( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image ur_rect_region_t region, ///< [in] defines the (width, height, depth) in pixels of the 1D, 2D, or 3D @@ -6354,13 +6450,15 @@ urEnqueueMemImageWrite( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_mem_handle_t hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image ur_rect_offset_t dstOrigin, ///< [in] defines the (x,y,z) offset in pixels in the destination 1D, 2D, @@ -6445,7 +6543,7 @@ typedef enum ur_usm_migration_flag_t { UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -6513,7 +6611,7 @@ urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hQueue` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == ptr` +/// + `NULL == pMem` /// + `NULL == pPattern` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -6533,7 +6631,7 @@ urEnqueueMemUnmap( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. const void *pPattern, ///< [in] pointer with the bytes of the pattern to set. @@ -6576,8 +6674,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object + void *pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void *pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -6591,6 +6689,11 @@ urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to prefetch USM memory /// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -6617,7 +6720,7 @@ urEnqueueUSMMemcpy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object + const void *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list @@ -6632,6 +6735,11 @@ urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to set USM memory advice /// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -6654,7 +6762,7 @@ urEnqueueUSMPrefetch( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object + const void *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular @@ -6695,7 +6803,7 @@ urEnqueueUSMAdvise( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void *pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -6745,9 +6853,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void *pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void *pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. size_t height, ///< [in] the height of columns to be copied. @@ -6886,7 +6995,6 @@ urEnqueueReadHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pipe_symbol` /// + `NULL == pSrc` -/// + `NULL == phEvent` /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` @@ -6907,7 +7015,7 @@ urEnqueueWriteHostPipe( const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. - ur_event_handle_t *phEvent ///< [out] returns an event object that identifies this write command + ur_event_handle_t *phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ); @@ -6989,6 +7097,20 @@ typedef struct ur_exp_sampler_mip_properties_t { } ur_exp_sampler_mip_properties_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Describes unique sampler addressing mode per dimension +/// +/// @details +/// - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t +/// as part of a `pNext` chain. +typedef struct ur_exp_sampler_addr_modes_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_sampler_addressing_mode_t addrModes[3]; ///< [in] Specify the address mode of the sampler per dimension + +} ur_exp_sampler_addr_modes_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Describes an interop memory resource descriptor typedef struct ur_exp_interop_mem_desc_t { @@ -7007,6 +7129,21 @@ typedef struct ur_exp_interop_semaphore_desc_t { } ur_exp_interop_semaphore_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Describes layered image properties +/// +/// @details +/// - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp +/// or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as +/// part of a `pNext` chain. +typedef struct ur_exp_layered_image_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + uint32_t numLayers; ///< [in] number of layers the image should have + +} ur_exp_layered_image_properties_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// @@ -7034,6 +7171,8 @@ typedef struct ur_exp_interop_semaphore_desc_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// + `NULL == pResultPitch` @@ -7773,7 +7912,7 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMemcpyUSMExp( +urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. @@ -7783,6 +7922,45 @@ urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + If `size` is higher than the allocation size of `ptr` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + /////////////////////////////////////////////////////////////////////////////// /// @brief Append a memory copy command to a command-buffer object /// @@ -7804,7 +7982,7 @@ urCommandBufferAppendMemcpyUSMExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferCopyExp( +urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. @@ -7838,7 +8016,7 @@ urCommandBufferAppendMembufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferWriteExp( +urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. size_t offset, ///< [in] offset in bytes in the buffer object. @@ -7871,7 +8049,7 @@ urCommandBufferAppendMembufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferReadExp( +urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. size_t offset, ///< [in] offset in bytes in the buffer object. @@ -7903,7 +8081,7 @@ urCommandBufferAppendMembufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferCopyRectExp( +urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. @@ -7941,7 +8119,7 @@ urCommandBufferAppendMembufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferWriteRectExp( +urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. @@ -7982,7 +8160,7 @@ urCommandBufferAppendMembufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferReadRectExp( +urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. @@ -8000,6 +8178,124 @@ urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a memory fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// + `NULL == hBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Prefetch command to a command-buffer object +/// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_MIGRATION_FLAGS_MASK & flags` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Advise command to a command-buffer object +/// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_ADVICE_FLAGS_MASK & advice` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -8033,6 +8329,90 @@ urCommandBufferEnqueueExp( ///< command-buffer execution instance. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental APIs for Cooperative Kernels +#if !defined(__GNUC__) +#pragma region cooperative kernels(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP +/// @brief The extension string which defines support for cooperative-kernels +/// which is returned when querying device extensions. +#define UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP "ur_exp_cooperative_kernels" +#endif // UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a command to execute a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGlobalWorkOffset` +/// + `NULL == pGlobalWorkSize` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t *pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t *pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t *pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Query the maximum number of work groups for a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGroupCountRet` +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -8412,6 +8792,16 @@ typedef struct ur_loader_config_enable_layer_params_t { const char **ppLayerName; } ur_loader_config_enable_layer_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urLoaderConfigSetCodeLocationCallback +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_loader_config_set_code_location_callback_params_t { + ur_loader_config_handle_t *phLoaderConfig; + ur_code_location_callback_t *ppfnCodeloc; + void **ppUserData; +} ur_loader_config_set_code_location_callback_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urPlatformGet /// @details Each entry is a pointer to the parameter passed to the function; @@ -8962,6 +9352,15 @@ typedef struct ur_kernel_set_specialization_constants_params_t { const ur_specialization_constant_info_t **ppSpecConstants; } ur_kernel_set_specialization_constants_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urKernelSuggestMaxCooperativeGroupCountExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t { + ur_kernel_handle_t *phKernel; + uint32_t **ppGroupCountRet; +} ur_kernel_suggest_max_cooperative_group_count_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urSamplerCreate /// @details Each entry is a pointer to the parameter passed to the function; @@ -9158,6 +9557,54 @@ typedef struct ur_physical_mem_release_params_t { ur_physical_mem_handle_t *phPhysicalMem; } ur_physical_mem_release_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterGet +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_get_params_t { + uint32_t *pNumEntries; + ur_adapter_handle_t **pphAdapters; + uint32_t **ppNumAdapters; +} ur_adapter_get_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterRelease +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_release_params_t { + ur_adapter_handle_t *phAdapter; +} ur_adapter_release_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterRetain +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_retain_params_t { + ur_adapter_handle_t *phAdapter; +} ur_adapter_retain_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterGetLastError +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_get_last_error_params_t { + ur_adapter_handle_t *phAdapter; + const char ***pppMessage; + int32_t **ppError; +} ur_adapter_get_last_error_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterGetInfo +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_get_info_params_t { + ur_adapter_handle_t *phAdapter; + ur_adapter_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_adapter_get_info_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueKernelLaunch /// @details Each entry is a pointer to the parameter passed to the function; @@ -9410,7 +9857,7 @@ typedef struct ur_enqueue_mem_unmap_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_enqueue_usm_fill_params_t { ur_queue_handle_t *phQueue; - void **pptr; + void **ppMem; size_t *ppatternSize; const void **ppPattern; size_t *psize; @@ -9561,6 +10008,22 @@ typedef struct ur_enqueue_write_host_pipe_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_write_host_pipe_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t { + ur_queue_handle_t *phQueue; + ur_kernel_handle_t *phKernel; + uint32_t *pworkDim; + const size_t **ppGlobalWorkOffset; + const size_t **ppGlobalWorkSize; + const size_t **ppLocalWorkSize; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_cooperative_kernel_launch_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urQueueGetInfo /// @details Each entry is a pointer to the parameter passed to the function; @@ -10027,10 +10490,10 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMemcpyUSMExp +/// @brief Function parameters for urCommandBufferAppendUSMMemcpyExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_memcpy_usm_exp_params_t { +typedef struct ur_command_buffer_append_usm_memcpy_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; void **ppDst; const void **ppSrc; @@ -10038,13 +10501,28 @@ typedef struct ur_command_buffer_append_memcpy_usm_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_memcpy_usm_exp_params_t; +} ur_command_buffer_append_usm_memcpy_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferCopyExp +/// @brief Function parameters for urCommandBufferAppendUSMFillExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_copy_exp_params_t { +typedef struct ur_command_buffer_append_usm_fill_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + void **ppMemory; + const void **ppPattern; + size_t *ppatternSize; + size_t *psize; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_fill_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendMemBufferCopyExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_mem_buffer_copy_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phSrcMem; ur_mem_handle_t *phDstMem; @@ -10054,13 +10532,13 @@ typedef struct ur_command_buffer_append_membuffer_copy_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_copy_exp_params_t; +} ur_command_buffer_append_mem_buffer_copy_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferWriteExp +/// @brief Function parameters for urCommandBufferAppendMemBufferWriteExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_write_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_write_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; size_t *poffset; @@ -10069,13 +10547,13 @@ typedef struct ur_command_buffer_append_membuffer_write_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_write_exp_params_t; +} ur_command_buffer_append_mem_buffer_write_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferReadExp +/// @brief Function parameters for urCommandBufferAppendMemBufferReadExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_read_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_read_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; size_t *poffset; @@ -10084,13 +10562,13 @@ typedef struct ur_command_buffer_append_membuffer_read_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_read_exp_params_t; +} ur_command_buffer_append_mem_buffer_read_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferCopyRectExp +/// @brief Function parameters for urCommandBufferAppendMemBufferCopyRectExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_copy_rect_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phSrcMem; ur_mem_handle_t *phDstMem; @@ -10104,13 +10582,13 @@ typedef struct ur_command_buffer_append_membuffer_copy_rect_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_copy_rect_exp_params_t; +} ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferWriteRectExp +/// @brief Function parameters for urCommandBufferAppendMemBufferWriteRectExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_write_rect_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_write_rect_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; ur_rect_offset_t *pbufferOffset; @@ -10124,13 +10602,13 @@ typedef struct ur_command_buffer_append_membuffer_write_rect_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_write_rect_exp_params_t; +} ur_command_buffer_append_mem_buffer_write_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferReadRectExp +/// @brief Function parameters for urCommandBufferAppendMemBufferReadRectExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_read_rect_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_read_rect_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; ur_rect_offset_t *pbufferOffset; @@ -10144,7 +10622,51 @@ typedef struct ur_command_buffer_append_membuffer_read_rect_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_read_rect_exp_params_t; +} ur_command_buffer_append_mem_buffer_read_rect_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendMemBufferFillExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_mem_buffer_fill_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + ur_mem_handle_t *phBuffer; + const void **ppPattern; + size_t *ppatternSize; + size_t *poffset; + size_t *psize; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_mem_buffer_fill_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMPrefetchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_migration_flags_t *pflags; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_prefetch_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMAdviseExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_advise_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_advice_flags_t *padvice; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_advise_exp_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp @@ -10190,69 +10712,13 @@ typedef struct ur_usm_p2p_peer_access_get_info_exp_params_t { } ur_usm_p2p_peer_access_get_info_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urInit +/// @brief Function parameters for urLoaderInit /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_init_params_t { +typedef struct ur_loader_init_params_t { ur_device_init_flags_t *pdevice_flags; ur_loader_config_handle_t *phLoaderConfig; -} ur_init_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urTearDown -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_tear_down_params_t { - void **ppParams; -} ur_tear_down_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterGet -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_get_params_t { - uint32_t *pNumEntries; - ur_adapter_handle_t **pphAdapters; - uint32_t **ppNumAdapters; -} ur_adapter_get_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterRelease -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_release_params_t { - ur_adapter_handle_t *phAdapter; -} ur_adapter_release_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterRetain -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_retain_params_t { - ur_adapter_handle_t *phAdapter; -} ur_adapter_retain_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterGetLastError -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_get_last_error_params_t { - ur_adapter_handle_t *phAdapter; - const char ***pppMessage; - int32_t **ppError; -} ur_adapter_get_last_error_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterGetInfo -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_get_info_params_t { - ur_adapter_handle_t *phAdapter; - ur_adapter_info_t *ppropName; - size_t *ppropSize; - void **ppPropValue; - size_t **ppPropSizeRet; -} ur_adapter_get_info_params_t; +} ur_loader_init_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urVirtualMemGranularityGetInfo diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 24d5427191..92fc742f72 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -7,7 +7,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ur_ddi.h - * @version v0.7-r0 + * @version v0.9-r0 * */ #ifndef UR_DDI_H_INCLUDED @@ -623,6 +623,39 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetKernelProcAddrTable_t)( ur_api_version_t, ur_kernel_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp +typedef ur_result_t(UR_APICALL *ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t)( + ur_kernel_handle_t, + uint32_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of KernelExp functions pointers +typedef struct ur_kernel_exp_dditable_t { + ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t pfnSuggestMaxCooperativeGroupCountExp; +} ur_kernel_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetKernelExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetKernelExpProcAddrTable_t)( + ur_api_version_t, + ur_kernel_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urSamplerCreate typedef ur_result_t(UR_APICALL *ur_pfnSamplerCreate_t)( @@ -859,6 +892,70 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetPhysicalMemProcAddrTable_t)( ur_api_version_t, ur_physical_mem_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterGet +typedef ur_result_t(UR_APICALL *ur_pfnAdapterGet_t)( + uint32_t, + ur_adapter_handle_t *, + uint32_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterRelease +typedef ur_result_t(UR_APICALL *ur_pfnAdapterRelease_t)( + ur_adapter_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterRetain +typedef ur_result_t(UR_APICALL *ur_pfnAdapterRetain_t)( + ur_adapter_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterGetLastError +typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetLastError_t)( + ur_adapter_handle_t, + const char **, + int32_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterGetInfo +typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetInfo_t)( + ur_adapter_handle_t, + ur_adapter_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of Global functions pointers +typedef struct ur_global_dditable_t { + ur_pfnAdapterGet_t pfnAdapterGet; + ur_pfnAdapterRelease_t pfnAdapterRelease; + ur_pfnAdapterRetain_t pfnAdapterRetain; + ur_pfnAdapterGetLastError_t pfnAdapterGetLastError; + ur_pfnAdapterGetInfo_t pfnAdapterGetInfo; +} ur_global_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Global table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetGlobalProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_global_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetGlobalProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetGlobalProcAddrTable_t)( + ur_api_version_t, + ur_global_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEnqueueKernelLaunch typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunch_t)( @@ -1238,6 +1335,46 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueProcAddrTable_t)( ur_api_version_t, ur_enqueue_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)( + ur_queue_handle_t, + ur_kernel_handle_t, + uint32_t, + const size_t *, + const size_t *, + const size_t *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of EnqueueExp functions pointers +typedef struct ur_enqueue_exp_dditable_t { + ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp; +} ur_enqueue_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetEnqueueExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueExpProcAddrTable_t)( + ur_api_version_t, + ur_enqueue_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urQueueGetInfo typedef ur_result_t(UR_APICALL *ur_pfnQueueGetInfo_t)( @@ -1719,19 +1856,31 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMemcpyUSMExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemcpyUSMExp_t)( +/// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMMemcpyExp_t)( + ur_exp_command_buffer_handle_t, + void *, + const void *, + size_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendUSMFillExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMFillExp_t)( ur_exp_command_buffer_handle_t, void *, const void *, size_t, + size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferCopyExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferCopyExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, @@ -1743,8 +1892,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferWriteExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferWriteExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, @@ -1755,8 +1904,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferReadExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferReadExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferReadExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, @@ -1767,8 +1916,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferCopyRectExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyRectExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferCopyRectExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, @@ -1784,8 +1933,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyRectExp_t) ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferWriteRectExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteRectExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferWriteRectExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, @@ -1801,8 +1950,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteRectExp_t ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferReadRectExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadRectExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferReadRectExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, @@ -1817,6 +1966,41 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadRectExp_t) const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendMemBufferFillExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferFillExp_t)( + ur_exp_command_buffer_handle_t, + ur_mem_handle_t, + const void *, + size_t, + size_t, + size_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMPrefetchExp_t)( + ur_exp_command_buffer_handle_t, + const void *, + size_t, + ur_usm_migration_flags_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendUSMAdviseExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMAdviseExp_t)( + ur_exp_command_buffer_handle_t, + const void *, + size_t, + ur_usm_advice_flags_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferEnqueueExp typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( @@ -1834,13 +2018,17 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferReleaseExp_t pfnReleaseExp; ur_pfnCommandBufferFinalizeExp_t pfnFinalizeExp; ur_pfnCommandBufferAppendKernelLaunchExp_t pfnAppendKernelLaunchExp; - ur_pfnCommandBufferAppendMemcpyUSMExp_t pfnAppendMemcpyUSMExp; - ur_pfnCommandBufferAppendMembufferCopyExp_t pfnAppendMembufferCopyExp; - ur_pfnCommandBufferAppendMembufferWriteExp_t pfnAppendMembufferWriteExp; - ur_pfnCommandBufferAppendMembufferReadExp_t pfnAppendMembufferReadExp; - ur_pfnCommandBufferAppendMembufferCopyRectExp_t pfnAppendMembufferCopyRectExp; - ur_pfnCommandBufferAppendMembufferWriteRectExp_t pfnAppendMembufferWriteRectExp; - ur_pfnCommandBufferAppendMembufferReadRectExp_t pfnAppendMembufferReadRectExp; + ur_pfnCommandBufferAppendUSMMemcpyExp_t pfnAppendUSMMemcpyExp; + ur_pfnCommandBufferAppendUSMFillExp_t pfnAppendUSMFillExp; + ur_pfnCommandBufferAppendMemBufferCopyExp_t pfnAppendMemBufferCopyExp; + ur_pfnCommandBufferAppendMemBufferWriteExp_t pfnAppendMemBufferWriteExp; + ur_pfnCommandBufferAppendMemBufferReadExp_t pfnAppendMemBufferReadExp; + ur_pfnCommandBufferAppendMemBufferCopyRectExp_t pfnAppendMemBufferCopyRectExp; + ur_pfnCommandBufferAppendMemBufferWriteRectExp_t pfnAppendMemBufferWriteRectExp; + ur_pfnCommandBufferAppendMemBufferReadRectExp_t pfnAppendMemBufferReadRectExp; + ur_pfnCommandBufferAppendMemBufferFillExp_t pfnAppendMemBufferFillExp; + ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; + ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; } ur_command_buffer_exp_dditable_t; @@ -1916,83 +2104,6 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetUsmP2PExpProcAddrTable_t)( ur_api_version_t, ur_usm_p2p_exp_dditable_t *); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urInit -typedef ur_result_t(UR_APICALL *ur_pfnInit_t)( - ur_device_init_flags_t, - ur_loader_config_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urTearDown -typedef ur_result_t(UR_APICALL *ur_pfnTearDown_t)( - void *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterGet -typedef ur_result_t(UR_APICALL *ur_pfnAdapterGet_t)( - uint32_t, - ur_adapter_handle_t *, - uint32_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterRelease -typedef ur_result_t(UR_APICALL *ur_pfnAdapterRelease_t)( - ur_adapter_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterRetain -typedef ur_result_t(UR_APICALL *ur_pfnAdapterRetain_t)( - ur_adapter_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterGetLastError -typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetLastError_t)( - ur_adapter_handle_t, - const char **, - int32_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterGetInfo -typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetInfo_t)( - ur_adapter_handle_t, - ur_adapter_info_t, - size_t, - void *, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of Global functions pointers -typedef struct ur_global_dditable_t { - ur_pfnInit_t pfnInit; - ur_pfnTearDown_t pfnTearDown; - ur_pfnAdapterGet_t pfnAdapterGet; - ur_pfnAdapterRelease_t pfnAdapterRelease; - ur_pfnAdapterRetain_t pfnAdapterRetain; - ur_pfnAdapterGetLastError_t pfnAdapterGetLastError; - ur_pfnAdapterGetInfo_t pfnAdapterGetInfo; -} ur_global_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's Global table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL -urGetGlobalProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_global_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers -); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetGlobalProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetGlobalProcAddrTable_t)( - ur_api_version_t, - ur_global_dditable_t *); - /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urVirtualMemGranularityGetInfo typedef ur_result_t(UR_APICALL *ur_pfnVirtualMemGranularityGetInfo_t)( @@ -2197,17 +2308,19 @@ typedef struct ur_dditable_t { ur_program_dditable_t Program; ur_program_exp_dditable_t ProgramExp; ur_kernel_dditable_t Kernel; + ur_kernel_exp_dditable_t KernelExp; ur_sampler_dditable_t Sampler; ur_mem_dditable_t Mem; ur_physical_mem_dditable_t PhysicalMem; + ur_global_dditable_t Global; ur_enqueue_dditable_t Enqueue; + ur_enqueue_exp_dditable_t EnqueueExp; ur_queue_dditable_t Queue; ur_bindless_images_exp_dditable_t BindlessImagesExp; ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; ur_command_buffer_exp_dditable_t CommandBufferExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; - ur_global_dditable_t Global; ur_virtual_mem_dditable_t VirtualMem; ur_device_dditable_t Device; } ur_dditable_t; diff --git a/source/common/ur_params.hpp b/include/ur_print.hpp similarity index 50% rename from source/common/ur_params.hpp rename to include/ur_print.hpp index 22b3b3110e..9a0ce9e657 100644 --- a/source/common/ur_params.hpp +++ b/include/ur_print.hpp @@ -6,1866 +6,1472 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file ur_params.hpp + * @file ur_print.hpp + * @version v0.9-r0 * */ -#ifndef UR_PARAMS_HPP -#define UR_PARAMS_HPP 1 +#ifndef UR_PRINT_HPP +#define UR_PRINT_HPP 1 #include "ur_api.h" #include #include -namespace ur_params { -template struct is_handle : std::false_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; +namespace ur::details { +template +struct is_handle : std::false_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; -template inline constexpr bool is_handle_v = is_handle::value; -template inline void serializePtr(std::ostream &os, T *ptr); template -inline void serializeFlag(std::ostream &os, uint32_t flag); +inline constexpr bool is_handle_v = is_handle::value; +template +inline ur_result_t printPtr(std::ostream &os, const T *ptr); +template +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template -inline void serializeTagged(std::ostream &os, const void *ptr, T value, - size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, T value, size_t size); -inline void serializeStruct(std::ostream &os, const void *ptr); +inline ur_result_t printStruct(std::ostream &os, const void *ptr); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_loader_config_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_loader_config_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_adapter_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_adapter_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_platform_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_platform_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_device_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); -inline void serializeUnion(std::ostream &os, - const union ur_device_partition_value_t params, - const enum ur_device_partition_t tag); +inline ur_result_t printUnion( + std::ostream &os, + const union ur_device_partition_value_t params, + const enum ur_device_partition_t tag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void -serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_context_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_context_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_mem_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_mem_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_image_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_image_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_sampler_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_sampler_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_usm_alloc_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_usm_alloc_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_usm_pool_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_usm_pool_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_virtual_mem_granularity_info_t value, - size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_virtual_mem_granularity_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_virtual_mem_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_virtual_mem_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); -inline void serializeUnion(std::ostream &os, - const union ur_program_metadata_value_t params, - const enum ur_program_metadata_type_t tag); +inline ur_result_t printUnion( + std::ostream &os, + const union ur_program_metadata_value_t params, + const enum ur_program_metadata_type_t tag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_program_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_program_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_program_build_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_program_build_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_group_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_group_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_sub_group_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_sub_group_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_exec_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_exec_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_queue_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_queue_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_event_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_event_info_t value, size_t size); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_profiling_info_t value, size_t size); +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_profiling_info_t value, size_t size); template <> -inline void serializeFlag(std::ostream &os, uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_exp_peer_info_t value, size_t size); - -} // namespace ur_params - -inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_structure_type_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_result_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_base_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_base_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_rect_offset_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_rect_region_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_init_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_loader_config_info_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_adapter_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_adapter_backend_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_platform_info_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_api_version_t value); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_platform_backend_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_device_binary_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_device_type_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_affinity_domain_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_partition_t value); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_partition_property_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_partition_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_fp_capability_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_mem_cache_type_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_local_mem_type_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_exec_capability_flag_t value); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_memory_order_capability_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_memory_scope_capability_flag_t value); -inline std::ostream & -operator<<(std::ostream &os, enum ur_device_usm_access_capability_flag_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_context_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_context_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_context_info_t value); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_mem_flag_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_mem_type_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_mem_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_image_channel_order_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_image_channel_type_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_image_info_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_image_format_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_image_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_buffer_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_buffer_channel_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_buffer_alloc_location_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_buffer_region_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_buffer_create_type_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_sampler_filter_mode_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_sampler_addressing_mode_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_sampler_info_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_sampler_desc_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_sampler_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_host_mem_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_device_mem_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_pool_flag_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_usm_type_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_alloc_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_advice_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_host_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_device_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_pool_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_pool_limits_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_pool_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_virtual_mem_granularity_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_virtual_mem_access_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_virtual_mem_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_physical_mem_flag_t value); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_physical_mem_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_metadata_type_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_metadata_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_program_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_build_status_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_binary_type_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_build_info_t value); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_specialization_constant_info_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_native_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_value_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_local_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_group_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_sub_group_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_cache_config_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_exec_info_t value); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_pointer_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_exec_info_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_sampler_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_mem_obj_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_queue_info_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_index_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_native_desc_t params); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_event_status_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_event_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_profiling_info_t value); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_execution_info_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_migration_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_image_copy_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_file_descriptor_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_win32_handle_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_exp_sampler_mip_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_interop_mem_desc_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_exp_interop_semaphore_desc_t params); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_exp_command_buffer_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_peer_info_t value); - -inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); + +} // namespace ur::details + +inline std::ostream &operator<<(std::ostream &os, ur_function_t value); +inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_result_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_base_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_base_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_rect_offset_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_rect_region_t params); +inline std::ostream &operator<<(std::ostream &os, ur_device_init_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_loader_config_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_code_location_t params); +inline std::ostream &operator<<(std::ostream &os, ur_adapter_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_adapter_backend_t value); +inline std::ostream &operator<<(std::ostream &os, ur_platform_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_api_version_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_platform_backend_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_binary_t params); +inline std::ostream &operator<<(std::ostream &os, ur_device_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_affinity_domain_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_partition_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_partition_property_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_partition_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_device_fp_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_mem_cache_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_local_mem_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_exec_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_memory_order_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_memory_scope_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_device_usm_access_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_context_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_context_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_mem_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_mem_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_mem_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_image_channel_order_t value); +inline std::ostream &operator<<(std::ostream &os, ur_image_channel_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_image_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_image_format_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_image_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_buffer_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_buffer_channel_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_buffer_alloc_location_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_buffer_region_t params); +inline std::ostream &operator<<(std::ostream &os, ur_buffer_create_type_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_sampler_filter_mode_t value); +inline std::ostream &operator<<(std::ostream &os, ur_sampler_addressing_mode_t value); +inline std::ostream &operator<<(std::ostream &os, ur_sampler_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_usm_host_mem_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_usm_device_mem_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_usm_pool_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_usm_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_usm_alloc_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_host_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_device_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_limits_desc_t params); +inline std::ostream &operator<<(std::ostream &os, ur_usm_pool_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_virtual_mem_granularity_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_virtual_mem_access_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_virtual_mem_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_physical_mem_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_physical_mem_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_program_metadata_type_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_metadata_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_program_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_program_build_status_t value); +inline std::ostream &operator<<(std::ostream &os, ur_program_binary_type_t value); +inline std::ostream &operator<<(std::ostream &os, ur_program_build_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_specialization_constant_info_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_arg_value_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_arg_local_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_kernel_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_kernel_group_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_kernel_sub_group_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_kernel_cache_config_t value); +inline std::ostream &operator<<(std::ostream &os, ur_kernel_exec_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_arg_pointer_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_exec_info_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_arg_sampler_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_arg_mem_obj_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_queue_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_queue_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_index_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_native_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_command_t value); +inline std::ostream &operator<<(std::ostream &os, ur_event_status_t value); +inline std::ostream &operator<<(std::ostream &os, ur_event_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_profiling_info_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, ur_execution_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_map_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_usm_migration_flag_t value); +inline std::ostream &operator<<(std::ostream &os, ur_exp_image_copy_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_file_descriptor_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_win32_handle_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_mip_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_layered_image_properties_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); +inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_function_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { switch (value) { - case UR_FUNCTION_CONTEXT_CREATE: os << "UR_FUNCTION_CONTEXT_CREATE"; break; - case UR_FUNCTION_CONTEXT_RETAIN: os << "UR_FUNCTION_CONTEXT_RETAIN"; break; - case UR_FUNCTION_CONTEXT_RELEASE: os << "UR_FUNCTION_CONTEXT_RELEASE"; break; - case UR_FUNCTION_CONTEXT_GET_INFO: os << "UR_FUNCTION_CONTEXT_GET_INFO"; break; - case UR_FUNCTION_CONTEXT_GET_NATIVE_HANDLE: os << "UR_FUNCTION_CONTEXT_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_CONTEXT_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_CONTEXT_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_CONTEXT_SET_EXTENDED_DELETER: os << "UR_FUNCTION_CONTEXT_SET_EXTENDED_DELETER"; break; - case UR_FUNCTION_DEVICE_GET: os << "UR_FUNCTION_DEVICE_GET"; break; - case UR_FUNCTION_DEVICE_GET_INFO: os << "UR_FUNCTION_DEVICE_GET_INFO"; break; - case UR_FUNCTION_DEVICE_RETAIN: os << "UR_FUNCTION_DEVICE_RETAIN"; break; - case UR_FUNCTION_DEVICE_RELEASE: os << "UR_FUNCTION_DEVICE_RELEASE"; break; - case UR_FUNCTION_DEVICE_PARTITION: os << "UR_FUNCTION_DEVICE_PARTITION"; break; - case UR_FUNCTION_DEVICE_SELECT_BINARY: os << "UR_FUNCTION_DEVICE_SELECT_BINARY"; break; - case UR_FUNCTION_DEVICE_GET_NATIVE_HANDLE: os << "UR_FUNCTION_DEVICE_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_DEVICE_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_DEVICE_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_DEVICE_GET_GLOBAL_TIMESTAMPS: os << "UR_FUNCTION_DEVICE_GET_GLOBAL_TIMESTAMPS"; break; - case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH: os << "UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH"; break; - case UR_FUNCTION_ENQUEUE_EVENTS_WAIT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT"; break; - case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ_RECT: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ_RECT"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE_RECT: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE_RECT"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY_RECT: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY_RECT"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_FILL: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_FILL"; break; - case UR_FUNCTION_ENQUEUE_MEM_IMAGE_READ: os << "UR_FUNCTION_ENQUEUE_MEM_IMAGE_READ"; break; - case UR_FUNCTION_ENQUEUE_MEM_IMAGE_WRITE: os << "UR_FUNCTION_ENQUEUE_MEM_IMAGE_WRITE"; break; - case UR_FUNCTION_ENQUEUE_MEM_IMAGE_COPY: os << "UR_FUNCTION_ENQUEUE_MEM_IMAGE_COPY"; break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_MAP: os << "UR_FUNCTION_ENQUEUE_MEM_BUFFER_MAP"; break; - case UR_FUNCTION_ENQUEUE_MEM_UNMAP: os << "UR_FUNCTION_ENQUEUE_MEM_UNMAP"; break; - case UR_FUNCTION_ENQUEUE_USM_FILL: os << "UR_FUNCTION_ENQUEUE_USM_FILL"; break; - case UR_FUNCTION_ENQUEUE_USM_MEMCPY: os << "UR_FUNCTION_ENQUEUE_USM_MEMCPY"; break; - case UR_FUNCTION_ENQUEUE_USM_PREFETCH: os << "UR_FUNCTION_ENQUEUE_USM_PREFETCH"; break; - case UR_FUNCTION_ENQUEUE_USM_ADVISE: os << "UR_FUNCTION_ENQUEUE_USM_ADVISE"; break; - case UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_WRITE: os << "UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_WRITE"; break; - case UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_READ: os << "UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_READ"; break; - case UR_FUNCTION_EVENT_GET_INFO: os << "UR_FUNCTION_EVENT_GET_INFO"; break; - case UR_FUNCTION_EVENT_GET_PROFILING_INFO: os << "UR_FUNCTION_EVENT_GET_PROFILING_INFO"; break; - case UR_FUNCTION_EVENT_WAIT: os << "UR_FUNCTION_EVENT_WAIT"; break; - case UR_FUNCTION_EVENT_RETAIN: os << "UR_FUNCTION_EVENT_RETAIN"; break; - case UR_FUNCTION_EVENT_RELEASE: os << "UR_FUNCTION_EVENT_RELEASE"; break; - case UR_FUNCTION_EVENT_GET_NATIVE_HANDLE: os << "UR_FUNCTION_EVENT_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_EVENT_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_EVENT_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_EVENT_SET_CALLBACK: os << "UR_FUNCTION_EVENT_SET_CALLBACK"; break; - case UR_FUNCTION_KERNEL_CREATE: os << "UR_FUNCTION_KERNEL_CREATE"; break; - case UR_FUNCTION_KERNEL_SET_ARG_VALUE: os << "UR_FUNCTION_KERNEL_SET_ARG_VALUE"; break; - case UR_FUNCTION_KERNEL_SET_ARG_LOCAL: os << "UR_FUNCTION_KERNEL_SET_ARG_LOCAL"; break; - case UR_FUNCTION_KERNEL_GET_INFO: os << "UR_FUNCTION_KERNEL_GET_INFO"; break; - case UR_FUNCTION_KERNEL_GET_GROUP_INFO: os << "UR_FUNCTION_KERNEL_GET_GROUP_INFO"; break; - case UR_FUNCTION_KERNEL_GET_SUB_GROUP_INFO: os << "UR_FUNCTION_KERNEL_GET_SUB_GROUP_INFO"; break; - case UR_FUNCTION_KERNEL_RETAIN: os << "UR_FUNCTION_KERNEL_RETAIN"; break; - case UR_FUNCTION_KERNEL_RELEASE: os << "UR_FUNCTION_KERNEL_RELEASE"; break; - case UR_FUNCTION_KERNEL_SET_ARG_POINTER: os << "UR_FUNCTION_KERNEL_SET_ARG_POINTER"; break; - case UR_FUNCTION_KERNEL_SET_EXEC_INFO: os << "UR_FUNCTION_KERNEL_SET_EXEC_INFO"; break; - case UR_FUNCTION_KERNEL_SET_ARG_SAMPLER: os << "UR_FUNCTION_KERNEL_SET_ARG_SAMPLER"; break; - case UR_FUNCTION_KERNEL_SET_ARG_MEM_OBJ: os << "UR_FUNCTION_KERNEL_SET_ARG_MEM_OBJ"; break; - case UR_FUNCTION_KERNEL_SET_SPECIALIZATION_CONSTANTS: os << "UR_FUNCTION_KERNEL_SET_SPECIALIZATION_CONSTANTS"; break; - case UR_FUNCTION_KERNEL_GET_NATIVE_HANDLE: os << "UR_FUNCTION_KERNEL_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_KERNEL_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_KERNEL_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_MEM_IMAGE_CREATE: os << "UR_FUNCTION_MEM_IMAGE_CREATE"; break; - case UR_FUNCTION_MEM_BUFFER_CREATE: os << "UR_FUNCTION_MEM_BUFFER_CREATE"; break; - case UR_FUNCTION_MEM_RETAIN: os << "UR_FUNCTION_MEM_RETAIN"; break; - case UR_FUNCTION_MEM_RELEASE: os << "UR_FUNCTION_MEM_RELEASE"; break; - case UR_FUNCTION_MEM_BUFFER_PARTITION: os << "UR_FUNCTION_MEM_BUFFER_PARTITION"; break; - case UR_FUNCTION_MEM_GET_NATIVE_HANDLE: os << "UR_FUNCTION_MEM_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_ENQUEUE_READ_HOST_PIPE: os << "UR_FUNCTION_ENQUEUE_READ_HOST_PIPE"; break; - case UR_FUNCTION_MEM_GET_INFO: os << "UR_FUNCTION_MEM_GET_INFO"; break; - case UR_FUNCTION_MEM_IMAGE_GET_INFO: os << "UR_FUNCTION_MEM_IMAGE_GET_INFO"; break; - case UR_FUNCTION_PLATFORM_GET: os << "UR_FUNCTION_PLATFORM_GET"; break; - case UR_FUNCTION_PLATFORM_GET_INFO: os << "UR_FUNCTION_PLATFORM_GET_INFO"; break; - case UR_FUNCTION_PLATFORM_GET_API_VERSION: os << "UR_FUNCTION_PLATFORM_GET_API_VERSION"; break; - case UR_FUNCTION_PLATFORM_GET_NATIVE_HANDLE: os << "UR_FUNCTION_PLATFORM_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_PLATFORM_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_PLATFORM_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_PROGRAM_CREATE_WITH_IL: os << "UR_FUNCTION_PROGRAM_CREATE_WITH_IL"; break; - case UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY: os << "UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY"; break; - case UR_FUNCTION_PROGRAM_BUILD: os << "UR_FUNCTION_PROGRAM_BUILD"; break; - case UR_FUNCTION_PROGRAM_COMPILE: os << "UR_FUNCTION_PROGRAM_COMPILE"; break; - case UR_FUNCTION_PROGRAM_LINK: os << "UR_FUNCTION_PROGRAM_LINK"; break; - case UR_FUNCTION_PROGRAM_RETAIN: os << "UR_FUNCTION_PROGRAM_RETAIN"; break; - case UR_FUNCTION_PROGRAM_RELEASE: os << "UR_FUNCTION_PROGRAM_RELEASE"; break; - case UR_FUNCTION_PROGRAM_GET_FUNCTION_POINTER: os << "UR_FUNCTION_PROGRAM_GET_FUNCTION_POINTER"; break; - case UR_FUNCTION_PROGRAM_GET_INFO: os << "UR_FUNCTION_PROGRAM_GET_INFO"; break; - case UR_FUNCTION_PROGRAM_GET_BUILD_INFO: os << "UR_FUNCTION_PROGRAM_GET_BUILD_INFO"; break; - case UR_FUNCTION_PROGRAM_SET_SPECIALIZATION_CONSTANTS: os << "UR_FUNCTION_PROGRAM_SET_SPECIALIZATION_CONSTANTS"; break; - case UR_FUNCTION_PROGRAM_GET_NATIVE_HANDLE: os << "UR_FUNCTION_PROGRAM_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_PROGRAM_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_PROGRAM_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_QUEUE_GET_INFO: os << "UR_FUNCTION_QUEUE_GET_INFO"; break; - case UR_FUNCTION_QUEUE_CREATE: os << "UR_FUNCTION_QUEUE_CREATE"; break; - case UR_FUNCTION_QUEUE_RETAIN: os << "UR_FUNCTION_QUEUE_RETAIN"; break; - case UR_FUNCTION_QUEUE_RELEASE: os << "UR_FUNCTION_QUEUE_RELEASE"; break; - case UR_FUNCTION_QUEUE_GET_NATIVE_HANDLE: os << "UR_FUNCTION_QUEUE_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_QUEUE_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_QUEUE_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_QUEUE_FINISH: os << "UR_FUNCTION_QUEUE_FINISH"; break; - case UR_FUNCTION_QUEUE_FLUSH: os << "UR_FUNCTION_QUEUE_FLUSH"; break; - - case UR_FUNCTION_INIT: - os << "UR_FUNCTION_INIT"; - break; - - case UR_FUNCTION_TEAR_DOWN: - os << "UR_FUNCTION_TEAR_DOWN"; - break; - case UR_FUNCTION_SAMPLER_CREATE: os << "UR_FUNCTION_SAMPLER_CREATE"; break; - case UR_FUNCTION_SAMPLER_RETAIN: os << "UR_FUNCTION_SAMPLER_RETAIN"; break; - case UR_FUNCTION_SAMPLER_RELEASE: os << "UR_FUNCTION_SAMPLER_RELEASE"; break; - case UR_FUNCTION_SAMPLER_GET_INFO: os << "UR_FUNCTION_SAMPLER_GET_INFO"; break; - case UR_FUNCTION_SAMPLER_GET_NATIVE_HANDLE: os << "UR_FUNCTION_SAMPLER_GET_NATIVE_HANDLE"; break; - case UR_FUNCTION_SAMPLER_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_SAMPLER_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_USM_HOST_ALLOC: os << "UR_FUNCTION_USM_HOST_ALLOC"; break; - case UR_FUNCTION_USM_DEVICE_ALLOC: os << "UR_FUNCTION_USM_DEVICE_ALLOC"; break; - case UR_FUNCTION_USM_SHARED_ALLOC: os << "UR_FUNCTION_USM_SHARED_ALLOC"; break; - case UR_FUNCTION_USM_FREE: os << "UR_FUNCTION_USM_FREE"; break; - case UR_FUNCTION_USM_GET_MEM_ALLOC_INFO: os << "UR_FUNCTION_USM_GET_MEM_ALLOC_INFO"; break; - case UR_FUNCTION_USM_POOL_CREATE: os << "UR_FUNCTION_USM_POOL_CREATE"; break; - case UR_FUNCTION_COMMAND_BUFFER_CREATE_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_CREATE_EXP"; break; - case UR_FUNCTION_PLATFORM_GET_BACKEND_OPTION: os << "UR_FUNCTION_PLATFORM_GET_BACKEND_OPTION"; break; - case UR_FUNCTION_MEM_BUFFER_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_MEM_BUFFER_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_MEM_IMAGE_CREATE_WITH_NATIVE_HANDLE: os << "UR_FUNCTION_MEM_IMAGE_CREATE_WITH_NATIVE_HANDLE"; break; - case UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE: os << "UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE"; break; - case UR_FUNCTION_USM_POOL_RETAIN: os << "UR_FUNCTION_USM_POOL_RETAIN"; break; - case UR_FUNCTION_USM_POOL_RELEASE: os << "UR_FUNCTION_USM_POOL_RELEASE"; break; - case UR_FUNCTION_USM_POOL_GET_INFO: os << "UR_FUNCTION_USM_POOL_GET_INFO"; break; - case UR_FUNCTION_COMMAND_BUFFER_RETAIN_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_RETAIN_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_RELEASE_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_RELEASE_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP"; break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP"; - break; - case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: os << "UR_FUNCTION_USM_PITCHED_ALLOC_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_ALLOCATE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_IMAGE_ALLOCATE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_FREE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_IMAGE_FREE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_CREATE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_CREATE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_CREATE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_CREATE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_COPY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_IMAGE_COPY_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_GET_INFO_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_IMAGE_GET_INFO_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_GET_LEVEL_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_GET_LEVEL_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_FREE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_FREE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_IMPORT_OPAQUE_FD_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_IMPORT_OPAQUE_FD_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_ARRAY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_ARRAY_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_RELEASE_INTEROP_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_RELEASE_INTEROP_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXP: - os << "UR_FUNCTION_BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_" - "EXP"; + os << "UR_FUNCTION_BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_DESTROY_EXTERNAL_SEMAPHORE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_DESTROY_EXTERNAL_SEMAPHORE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP"; break; - case UR_FUNCTION_BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP"; break; - case UR_FUNCTION_ENQUEUE_USM_FILL_2D: os << "UR_FUNCTION_ENQUEUE_USM_FILL_2D"; break; - case UR_FUNCTION_ENQUEUE_USM_MEMCPY_2D: os << "UR_FUNCTION_ENQUEUE_USM_MEMCPY_2D"; break; - case UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO: os << "UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO"; break; - case UR_FUNCTION_VIRTUAL_MEM_RESERVE: os << "UR_FUNCTION_VIRTUAL_MEM_RESERVE"; break; - case UR_FUNCTION_VIRTUAL_MEM_FREE: os << "UR_FUNCTION_VIRTUAL_MEM_FREE"; break; - case UR_FUNCTION_VIRTUAL_MEM_MAP: os << "UR_FUNCTION_VIRTUAL_MEM_MAP"; break; - case UR_FUNCTION_VIRTUAL_MEM_UNMAP: os << "UR_FUNCTION_VIRTUAL_MEM_UNMAP"; break; - case UR_FUNCTION_VIRTUAL_MEM_SET_ACCESS: os << "UR_FUNCTION_VIRTUAL_MEM_SET_ACCESS"; break; - case UR_FUNCTION_VIRTUAL_MEM_GET_INFO: os << "UR_FUNCTION_VIRTUAL_MEM_GET_INFO"; break; - case UR_FUNCTION_PHYSICAL_MEM_CREATE: os << "UR_FUNCTION_PHYSICAL_MEM_CREATE"; break; - case UR_FUNCTION_PHYSICAL_MEM_RETAIN: os << "UR_FUNCTION_PHYSICAL_MEM_RETAIN"; break; - case UR_FUNCTION_PHYSICAL_MEM_RELEASE: os << "UR_FUNCTION_PHYSICAL_MEM_RELEASE"; break; - case UR_FUNCTION_USM_IMPORT_EXP: os << "UR_FUNCTION_USM_IMPORT_EXP"; break; - case UR_FUNCTION_USM_RELEASE_EXP: os << "UR_FUNCTION_USM_RELEASE_EXP"; break; - case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: os << "UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP"; break; - case UR_FUNCTION_USM_P2P_DISABLE_PEER_ACCESS_EXP: os << "UR_FUNCTION_USM_P2P_DISABLE_PEER_ACCESS_EXP"; break; - case UR_FUNCTION_USM_P2P_PEER_ACCESS_GET_INFO_EXP: os << "UR_FUNCTION_USM_P2P_PEER_ACCESS_GET_INFO_EXP"; break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP"; - break; - case UR_FUNCTION_LOADER_CONFIG_CREATE: os << "UR_FUNCTION_LOADER_CONFIG_CREATE"; break; - case UR_FUNCTION_LOADER_CONFIG_RELEASE: os << "UR_FUNCTION_LOADER_CONFIG_RELEASE"; break; - case UR_FUNCTION_LOADER_CONFIG_RETAIN: os << "UR_FUNCTION_LOADER_CONFIG_RETAIN"; break; - case UR_FUNCTION_LOADER_CONFIG_GET_INFO: os << "UR_FUNCTION_LOADER_CONFIG_GET_INFO"; break; - case UR_FUNCTION_LOADER_CONFIG_ENABLE_LAYER: os << "UR_FUNCTION_LOADER_CONFIG_ENABLE_LAYER"; break; - case UR_FUNCTION_ADAPTER_RELEASE: os << "UR_FUNCTION_ADAPTER_RELEASE"; break; - case UR_FUNCTION_ADAPTER_GET: os << "UR_FUNCTION_ADAPTER_GET"; break; - case UR_FUNCTION_ADAPTER_RETAIN: os << "UR_FUNCTION_ADAPTER_RETAIN"; break; - case UR_FUNCTION_ADAPTER_GET_LAST_ERROR: os << "UR_FUNCTION_ADAPTER_GET_LAST_ERROR"; break; - case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; - case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; - case UR_FUNCTION_PROGRAM_COMPILE_EXP: os << "UR_FUNCTION_PROGRAM_COMPILE_EXP"; break; - case UR_FUNCTION_PROGRAM_LINK_EXP: os << "UR_FUNCTION_PROGRAM_LINK_EXP"; break; + case UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK: + os << "UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK"; + break; + case UR_FUNCTION_LOADER_INIT: + os << "UR_FUNCTION_LOADER_INIT"; + break; + case UR_FUNCTION_LOADER_TEAR_DOWN: + os << "UR_FUNCTION_LOADER_TEAR_DOWN"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP"; + break; + case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP"; + break; + case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: + os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP"; + break; default: os << "unknown enumerator"; break; } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_structure_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_structure_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { switch (value) { - case UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES: os << "UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_IMAGE_DESC: os << "UR_STRUCTURE_TYPE_IMAGE_DESC"; break; - case UR_STRUCTURE_TYPE_BUFFER_PROPERTIES: os << "UR_STRUCTURE_TYPE_BUFFER_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_BUFFER_REGION: os << "UR_STRUCTURE_TYPE_BUFFER_REGION"; break; - case UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES: os << "UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES: os << "UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES: os << "UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_USM_DESC: os << "UR_STRUCTURE_TYPE_USM_DESC"; break; - case UR_STRUCTURE_TYPE_USM_HOST_DESC: os << "UR_STRUCTURE_TYPE_USM_HOST_DESC"; break; - case UR_STRUCTURE_TYPE_USM_DEVICE_DESC: os << "UR_STRUCTURE_TYPE_USM_DEVICE_DESC"; break; - case UR_STRUCTURE_TYPE_USM_POOL_DESC: os << "UR_STRUCTURE_TYPE_USM_POOL_DESC"; break; - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: os << "UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC"; break; - case UR_STRUCTURE_TYPE_DEVICE_BINARY: os << "UR_STRUCTURE_TYPE_DEVICE_BINARY"; break; - case UR_STRUCTURE_TYPE_SAMPLER_DESC: os << "UR_STRUCTURE_TYPE_SAMPLER_DESC"; break; - case UR_STRUCTURE_TYPE_QUEUE_PROPERTIES: os << "UR_STRUCTURE_TYPE_QUEUE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES: os << "UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES: os << "UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC: os << "UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC"; break; - case UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES: os << "UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES: os << "UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; - case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC: os << "UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC"; break; - case UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC: os << "UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC"; break; - case UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR: os << "UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR"; break; - case UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE: os << "UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE"; break; + case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES"; + break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: + os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES"; + break; default: os << "unknown enumerator"; break; } return os; } -namespace ur_params { -inline void serializeStruct(std::ostream &os, const void *ptr) { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_structure_type_t struct +inline ur_result_t printStruct(std::ostream &os, const void *ptr) { if (ptr == NULL) { - ur_params::serializePtr(os, ptr); - return; + return printPtr(os, ptr); } - enum ur_structure_type_t *value = (enum ur_structure_type_t *)ptr; + const enum ur_structure_type_t *value = (const enum ur_structure_type_t *)ptr; switch (*value) { case UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES: { - const ur_context_properties_t *pstruct = - (const ur_context_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_context_properties_t *pstruct = (const ur_context_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_IMAGE_DESC: { const ur_image_desc_t *pstruct = (const ur_image_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_BUFFER_PROPERTIES: { - const ur_buffer_properties_t *pstruct = - (const ur_buffer_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_buffer_properties_t *pstruct = (const ur_buffer_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_BUFFER_REGION: { const ur_buffer_region_t *pstruct = (const ur_buffer_region_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES: { - const ur_buffer_channel_properties_t *pstruct = - (const ur_buffer_channel_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_buffer_channel_properties_t *pstruct = (const ur_buffer_channel_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES: { - const ur_buffer_alloc_location_properties_t *pstruct = - (const ur_buffer_alloc_location_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_buffer_alloc_location_properties_t *pstruct = (const ur_buffer_alloc_location_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES: { - const ur_program_properties_t *pstruct = - (const ur_program_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_program_properties_t *pstruct = (const ur_program_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_USM_DESC: { const ur_usm_desc_t *pstruct = (const ur_usm_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_USM_HOST_DESC: { const ur_usm_host_desc_t *pstruct = (const ur_usm_host_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_USM_DEVICE_DESC: { const ur_usm_device_desc_t *pstruct = (const ur_usm_device_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_USM_POOL_DESC: { const ur_usm_pool_desc_t *pstruct = (const ur_usm_pool_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *pstruct = - (const ur_usm_pool_limits_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_usm_pool_limits_desc_t *pstruct = (const ur_usm_pool_limits_desc_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_DEVICE_BINARY: { const ur_device_binary_t *pstruct = (const ur_device_binary_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_SAMPLER_DESC: { const ur_sampler_desc_t *pstruct = (const ur_sampler_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_QUEUE_PROPERTIES: { - const ur_queue_properties_t *pstruct = - (const ur_queue_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_queue_properties_t *pstruct = (const ur_queue_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES: { - const ur_queue_index_properties_t *pstruct = - (const ur_queue_index_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_queue_index_properties_t *pstruct = (const ur_queue_index_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES: { - const ur_context_native_properties_t *pstruct = - (const ur_context_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_context_native_properties_t *pstruct = (const ur_context_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES: { - const ur_kernel_native_properties_t *pstruct = - (const ur_kernel_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_native_properties_t *pstruct = (const ur_kernel_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES: { - const ur_queue_native_properties_t *pstruct = - (const ur_queue_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_queue_native_properties_t *pstruct = (const ur_queue_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES: { - const ur_mem_native_properties_t *pstruct = - (const ur_mem_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_mem_native_properties_t *pstruct = (const ur_mem_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES: { - const ur_event_native_properties_t *pstruct = - (const ur_event_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_event_native_properties_t *pstruct = (const ur_event_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES: { - const ur_platform_native_properties_t *pstruct = - (const ur_platform_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_platform_native_properties_t *pstruct = (const ur_platform_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES: { - const ur_device_native_properties_t *pstruct = - (const ur_device_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_device_native_properties_t *pstruct = (const ur_device_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES: { - const ur_program_native_properties_t *pstruct = - (const ur_program_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_program_native_properties_t *pstruct = (const ur_program_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES: { - const ur_sampler_native_properties_t *pstruct = - (const ur_sampler_native_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_sampler_native_properties_t *pstruct = (const ur_sampler_native_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC: { - const ur_queue_native_desc_t *pstruct = - (const ur_queue_native_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_queue_native_desc_t *pstruct = (const ur_queue_native_desc_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES: { - const ur_device_partition_properties_t *pstruct = - (const ur_device_partition_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_device_partition_properties_t *pstruct = (const ur_device_partition_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES: { - const ur_kernel_arg_mem_obj_properties_t *pstruct = - (const ur_kernel_arg_mem_obj_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_arg_mem_obj_properties_t *pstruct = (const ur_kernel_arg_mem_obj_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES: { - const ur_physical_mem_properties_t *pstruct = - (const ur_physical_mem_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_physical_mem_properties_t *pstruct = (const ur_physical_mem_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES: { - const ur_kernel_arg_pointer_properties_t *pstruct = - (const ur_kernel_arg_pointer_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_arg_pointer_properties_t *pstruct = (const ur_kernel_arg_pointer_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES: { - const ur_kernel_arg_sampler_properties_t *pstruct = - (const ur_kernel_arg_sampler_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_arg_sampler_properties_t *pstruct = (const ur_kernel_arg_sampler_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES: { - const ur_kernel_exec_info_properties_t *pstruct = - (const ur_kernel_exec_info_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_exec_info_properties_t *pstruct = (const ur_kernel_exec_info_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES: { - const ur_kernel_arg_value_properties_t *pstruct = - (const ur_kernel_arg_value_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_arg_value_properties_t *pstruct = (const ur_kernel_arg_value_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES: { - const ur_kernel_arg_local_properties_t *pstruct = - (const ur_kernel_arg_local_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_kernel_arg_local_properties_t *pstruct = (const ur_kernel_arg_local_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: { - const ur_exp_command_buffer_desc_t *pstruct = - (const ur_exp_command_buffer_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_exp_command_buffer_desc_t *pstruct = (const ur_exp_command_buffer_desc_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { - const ur_exp_sampler_mip_properties_t *pstruct = - (const ur_exp_sampler_mip_properties_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC: { - const ur_exp_interop_mem_desc_t *pstruct = - (const ur_exp_interop_mem_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_exp_interop_mem_desc_t *pstruct = (const ur_exp_interop_mem_desc_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC: { - const ur_exp_interop_semaphore_desc_t *pstruct = - (const ur_exp_interop_semaphore_desc_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_exp_interop_semaphore_desc_t *pstruct = (const ur_exp_interop_semaphore_desc_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR: { - const ur_exp_file_descriptor_t *pstruct = - (const ur_exp_file_descriptor_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_exp_file_descriptor_t *pstruct = (const ur_exp_file_descriptor_t *)ptr; + printPtr(os, pstruct); } break; case UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE: { - const ur_exp_win32_handle_t *pstruct = - (const ur_exp_win32_handle_t *)ptr; - ur_params::serializePtr(os, pstruct); + const ur_exp_win32_handle_t *pstruct = (const ur_exp_win32_handle_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: { + const ur_exp_layered_image_properties_t *pstruct = (const ur_exp_layered_image_properties_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: { + const ur_exp_sampler_addr_modes_t *pstruct = (const ur_exp_sampler_addr_modes_t *)ptr; + printPtr(os, pstruct); } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, enum ur_result_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_result_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_result_t value) { switch (value) { - case UR_RESULT_SUCCESS: os << "UR_RESULT_SUCCESS"; break; - case UR_RESULT_ERROR_INVALID_OPERATION: os << "UR_RESULT_ERROR_INVALID_OPERATION"; break; - case UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES: os << "UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES"; break; - case UR_RESULT_ERROR_INVALID_QUEUE: os << "UR_RESULT_ERROR_INVALID_QUEUE"; break; - case UR_RESULT_ERROR_INVALID_VALUE: os << "UR_RESULT_ERROR_INVALID_VALUE"; break; - case UR_RESULT_ERROR_INVALID_CONTEXT: os << "UR_RESULT_ERROR_INVALID_CONTEXT"; break; - case UR_RESULT_ERROR_INVALID_PLATFORM: os << "UR_RESULT_ERROR_INVALID_PLATFORM"; break; - case UR_RESULT_ERROR_INVALID_BINARY: os << "UR_RESULT_ERROR_INVALID_BINARY"; break; - case UR_RESULT_ERROR_INVALID_PROGRAM: os << "UR_RESULT_ERROR_INVALID_PROGRAM"; break; - case UR_RESULT_ERROR_INVALID_SAMPLER: os << "UR_RESULT_ERROR_INVALID_SAMPLER"; break; - case UR_RESULT_ERROR_INVALID_BUFFER_SIZE: os << "UR_RESULT_ERROR_INVALID_BUFFER_SIZE"; break; - case UR_RESULT_ERROR_INVALID_MEM_OBJECT: os << "UR_RESULT_ERROR_INVALID_MEM_OBJECT"; break; - case UR_RESULT_ERROR_INVALID_EVENT: os << "UR_RESULT_ERROR_INVALID_EVENT"; break; - case UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: os << "UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST"; break; - case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET: os << "UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET"; break; - case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE: os << "UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE"; break; - case UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE: os << "UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE"; break; - case UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: os << "UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE"; break; - case UR_RESULT_ERROR_DEVICE_NOT_FOUND: os << "UR_RESULT_ERROR_DEVICE_NOT_FOUND"; break; - case UR_RESULT_ERROR_INVALID_DEVICE: os << "UR_RESULT_ERROR_INVALID_DEVICE"; break; - case UR_RESULT_ERROR_DEVICE_LOST: os << "UR_RESULT_ERROR_DEVICE_LOST"; break; - case UR_RESULT_ERROR_DEVICE_REQUIRES_RESET: os << "UR_RESULT_ERROR_DEVICE_REQUIRES_RESET"; break; - case UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE: os << "UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE"; break; - case UR_RESULT_ERROR_DEVICE_PARTITION_FAILED: os << "UR_RESULT_ERROR_DEVICE_PARTITION_FAILED"; break; - case UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT: os << "UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT"; break; - case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE: os << "UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE"; break; - case UR_RESULT_ERROR_INVALID_WORK_DIMENSION: os << "UR_RESULT_ERROR_INVALID_WORK_DIMENSION"; break; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGS: os << "UR_RESULT_ERROR_INVALID_KERNEL_ARGS"; break; - case UR_RESULT_ERROR_INVALID_KERNEL: os << "UR_RESULT_ERROR_INVALID_KERNEL"; break; - case UR_RESULT_ERROR_INVALID_KERNEL_NAME: os << "UR_RESULT_ERROR_INVALID_KERNEL_NAME"; break; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX: os << "UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX"; break; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE: os << "UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE"; break; - case UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE: os << "UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE"; break; - case UR_RESULT_ERROR_INVALID_IMAGE_SIZE: os << "UR_RESULT_ERROR_INVALID_IMAGE_SIZE"; break; - case UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR: os << "UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR"; break; - case UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED: os << "UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED"; break; - case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE: os << "UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE"; break; - case UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE: os << "UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE"; break; - case UR_RESULT_ERROR_UNINITIALIZED: os << "UR_RESULT_ERROR_UNINITIALIZED"; break; - case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY: os << "UR_RESULT_ERROR_OUT_OF_HOST_MEMORY"; break; - case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY: os << "UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY"; break; - case UR_RESULT_ERROR_OUT_OF_RESOURCES: os << "UR_RESULT_ERROR_OUT_OF_RESOURCES"; break; - case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE: os << "UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE"; break; - case UR_RESULT_ERROR_PROGRAM_LINK_FAILURE: os << "UR_RESULT_ERROR_PROGRAM_LINK_FAILURE"; break; - case UR_RESULT_ERROR_UNSUPPORTED_VERSION: os << "UR_RESULT_ERROR_UNSUPPORTED_VERSION"; break; - case UR_RESULT_ERROR_UNSUPPORTED_FEATURE: os << "UR_RESULT_ERROR_UNSUPPORTED_FEATURE"; break; - case UR_RESULT_ERROR_INVALID_ARGUMENT: os << "UR_RESULT_ERROR_INVALID_ARGUMENT"; break; - case UR_RESULT_ERROR_INVALID_NULL_HANDLE: os << "UR_RESULT_ERROR_INVALID_NULL_HANDLE"; break; - case UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE: os << "UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE"; break; - case UR_RESULT_ERROR_INVALID_NULL_POINTER: os << "UR_RESULT_ERROR_INVALID_NULL_POINTER"; break; - case UR_RESULT_ERROR_INVALID_SIZE: os << "UR_RESULT_ERROR_INVALID_SIZE"; break; - case UR_RESULT_ERROR_UNSUPPORTED_SIZE: os << "UR_RESULT_ERROR_UNSUPPORTED_SIZE"; break; - case UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT: os << "UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT"; break; - case UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT: os << "UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT"; break; - case UR_RESULT_ERROR_INVALID_ENUMERATION: os << "UR_RESULT_ERROR_INVALID_ENUMERATION"; break; - case UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION: os << "UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION"; break; - case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT: os << "UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT"; break; - case UR_RESULT_ERROR_INVALID_NATIVE_BINARY: os << "UR_RESULT_ERROR_INVALID_NATIVE_BINARY"; break; - case UR_RESULT_ERROR_INVALID_GLOBAL_NAME: os << "UR_RESULT_ERROR_INVALID_GLOBAL_NAME"; break; - case UR_RESULT_ERROR_INVALID_FUNCTION_NAME: os << "UR_RESULT_ERROR_INVALID_FUNCTION_NAME"; break; - case UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION: os << "UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION"; break; - case UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION: os << "UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION"; break; - case UR_RESULT_ERROR_PROGRAM_UNLINKED: os << "UR_RESULT_ERROR_PROGRAM_UNLINKED"; break; - case UR_RESULT_ERROR_OVERLAPPING_REGIONS: os << "UR_RESULT_ERROR_OVERLAPPING_REGIONS"; break; - case UR_RESULT_ERROR_INVALID_HOST_PTR: os << "UR_RESULT_ERROR_INVALID_HOST_PTR"; break; - case UR_RESULT_ERROR_INVALID_USM_SIZE: os << "UR_RESULT_ERROR_INVALID_USM_SIZE"; break; - case UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE: os << "UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE"; break; - case UR_RESULT_ERROR_ADAPTER_SPECIFIC: os << "UR_RESULT_ERROR_ADAPTER_SPECIFIC"; break; - case UR_RESULT_ERROR_LAYER_NOT_PRESENT: os << "UR_RESULT_ERROR_LAYER_NOT_PRESENT"; break; - case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP"; break; - case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP"; break; - case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP"; break; - case UR_RESULT_ERROR_UNKNOWN: os << "UR_RESULT_ERROR_UNKNOWN"; break; @@ -1875,8 +1481,11 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_result_t value) { } return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_base_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_base_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_base_properties_t params) { os << "(struct ur_base_properties_t){"; os << ".stype = "; @@ -1886,13 +1495,17 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_base_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_base_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_base_desc_t params) { os << "(struct ur_base_desc_t){"; os << ".stype = "; @@ -1902,13 +1515,17 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_rect_offset_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_rect_offset_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_rect_offset_t params) { os << "(struct ur_rect_offset_t){"; os << ".x = "; @@ -1928,8 +1545,11 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_rect_region_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_rect_region_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_rect_region_t params) { os << "(struct ur_rect_region_t){"; os << ".width = "; @@ -1949,26 +1569,24 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_init_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_init_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_init_flag_t value) { switch (value) { - case UR_DEVICE_INIT_FLAG_GPU: os << "UR_DEVICE_INIT_FLAG_GPU"; break; - case UR_DEVICE_INIT_FLAG_CPU: os << "UR_DEVICE_INIT_FLAG_CPU"; break; - case UR_DEVICE_INIT_FLAG_FPGA: os << "UR_DEVICE_INIT_FLAG_FPGA"; break; - case UR_DEVICE_INIT_FLAG_MCA: os << "UR_DEVICE_INIT_FLAG_MCA"; break; - case UR_DEVICE_INIT_FLAG_VPU: os << "UR_DEVICE_INIT_FLAG_VPU"; break; @@ -1978,11 +1596,12 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_init_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; @@ -2006,8 +1625,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_INIT_FLAG_CPU; } - if ((val & UR_DEVICE_INIT_FLAG_FPGA) == - (uint32_t)UR_DEVICE_INIT_FLAG_FPGA) { + if ((val & UR_DEVICE_INIT_FLAG_FPGA) == (uint32_t)UR_DEVICE_INIT_FLAG_FPGA) { val ^= (uint32_t)UR_DEVICE_INIT_FLAG_FPGA; if (!first) { os << " | "; @@ -2045,16 +1663,18 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_loader_config_info_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_loader_config_info_t value) { switch (value) { - case UR_LOADER_CONFIG_INFO_AVAILABLE_LAYERS: os << "UR_LOADER_CONFIG_INFO_AVAILABLE_LAYERS"; break; - case UR_LOADER_CONFIG_INFO_REFERENCE_COUNT: os << "UR_LOADER_CONFIG_INFO_REFERENCE_COUNT"; break; @@ -2064,31 +1684,28 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_loader_config_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_loader_config_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_loader_config_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_LOADER_CONFIG_INFO_AVAILABLE_LAYERS: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_LOADER_CONFIG_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2096,18 +1713,52 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_adapter_info_t value) { - switch (value) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_code_location_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_code_location_t params) { + os << "(struct ur_code_location_t){"; + + os << ".functionName = "; + + ur::details::printPtr(os, + (params.functionName)); + + os << ", "; + os << ".sourceFile = "; + ur::details::printPtr(os, + (params.sourceFile)); + + os << ", "; + os << ".lineNumber = "; + + os << (params.lineNumber); + + os << ", "; + os << ".columnNumber = "; + + os << (params.columnNumber); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_adapter_info_t value) { + switch (value) { case UR_ADAPTER_INFO_BACKEND: os << "UR_ADAPTER_INFO_BACKEND"; break; - case UR_ADAPTER_INFO_REFERENCE_COUNT: os << "UR_ADAPTER_INFO_REFERENCE_COUNT"; break; @@ -2117,39 +1768,35 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_adapter_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_adapter_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_adapter_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_ADAPTER_INFO_BACKEND: { const ur_adapter_backend_t *tptr = (const ur_adapter_backend_t *)ptr; if (sizeof(ur_adapter_backend_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_adapter_backend_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_adapter_backend_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_ADAPTER_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2157,34 +1804,33 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_adapter_backend_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_backend_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_adapter_backend_t value) { + switch (value) { case UR_ADAPTER_BACKEND_UNKNOWN: os << "UR_ADAPTER_BACKEND_UNKNOWN"; break; - case UR_ADAPTER_BACKEND_LEVEL_ZERO: os << "UR_ADAPTER_BACKEND_LEVEL_ZERO"; break; - case UR_ADAPTER_BACKEND_OPENCL: os << "UR_ADAPTER_BACKEND_OPENCL"; break; - case UR_ADAPTER_BACKEND_CUDA: os << "UR_ADAPTER_BACKEND_CUDA"; break; - case UR_ADAPTER_BACKEND_HIP: os << "UR_ADAPTER_BACKEND_HIP"; break; - case UR_ADAPTER_BACKEND_NATIVE_CPU: os << "UR_ADAPTER_BACKEND_NATIVE_CPU"; break; @@ -2194,30 +1840,27 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_platform_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_platform_info_t value) { switch (value) { - case UR_PLATFORM_INFO_NAME: os << "UR_PLATFORM_INFO_NAME"; break; - case UR_PLATFORM_INFO_VENDOR_NAME: os << "UR_PLATFORM_INFO_VENDOR_NAME"; break; - case UR_PLATFORM_INFO_VERSION: os << "UR_PLATFORM_INFO_VERSION"; break; - case UR_PLATFORM_INFO_EXTENSIONS: os << "UR_PLATFORM_INFO_EXTENSIONS"; break; - case UR_PLATFORM_INFO_PROFILE: os << "UR_PLATFORM_INFO_PROFILE"; break; - case UR_PLATFORM_INFO_BACKEND: os << "UR_PLATFORM_INFO_BACKEND"; break; @@ -2227,55 +1870,48 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_platform_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_platform_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_platform_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_PLATFORM_INFO_NAME: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PLATFORM_INFO_VENDOR_NAME: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PLATFORM_INFO_VERSION: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PLATFORM_INFO_EXTENSIONS: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PLATFORM_INFO_PROFILE: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PLATFORM_INFO_BACKEND: { const ur_platform_backend_t *tptr = (const ur_platform_backend_t *)ptr; if (sizeof(ur_platform_backend_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_platform_backend_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_platform_backend_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2283,17 +1919,25 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, enum ur_api_version_t value) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_api_version_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_api_version_t value) { os << UR_MAJOR_VERSION(value) << "." << UR_MINOR_VERSION(value); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_native_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_platform_native_properties_t params) { os << "(struct ur_platform_native_properties_t){"; os << ".stype = "; @@ -2303,7 +1947,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -2313,30 +1958,27 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_platform_backend_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_backend_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_platform_backend_t value) { switch (value) { - case UR_PLATFORM_BACKEND_UNKNOWN: os << "UR_PLATFORM_BACKEND_UNKNOWN"; break; - case UR_PLATFORM_BACKEND_LEVEL_ZERO: os << "UR_PLATFORM_BACKEND_LEVEL_ZERO"; break; - case UR_PLATFORM_BACKEND_OPENCL: os << "UR_PLATFORM_BACKEND_OPENCL"; break; - case UR_PLATFORM_BACKEND_CUDA: os << "UR_PLATFORM_BACKEND_CUDA"; break; - case UR_PLATFORM_BACKEND_HIP: os << "UR_PLATFORM_BACKEND_HIP"; break; - case UR_PLATFORM_BACKEND_NATIVE_CPU: os << "UR_PLATFORM_BACKEND_NATIVE_CPU"; break; @@ -2346,8 +1988,11 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_device_binary_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_binary_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_device_binary_t params) { os << "(struct ur_device_binary_t){"; os << ".stype = "; @@ -2357,43 +2002,42 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".pDeviceTargetSpec = "; - ur_params::serializePtr(os, (params.pDeviceTargetSpec)); + ur::details::printPtr(os, + (params.pDeviceTargetSpec)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_device_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_type_t value) { switch (value) { - case UR_DEVICE_TYPE_DEFAULT: os << "UR_DEVICE_TYPE_DEFAULT"; break; - case UR_DEVICE_TYPE_ALL: os << "UR_DEVICE_TYPE_ALL"; break; - case UR_DEVICE_TYPE_GPU: os << "UR_DEVICE_TYPE_GPU"; break; - case UR_DEVICE_TYPE_CPU: os << "UR_DEVICE_TYPE_CPU"; break; - case UR_DEVICE_TYPE_FPGA: os << "UR_DEVICE_TYPE_FPGA"; break; - case UR_DEVICE_TYPE_MCA: os << "UR_DEVICE_TYPE_MCA"; break; - case UR_DEVICE_TYPE_VPU: os << "UR_DEVICE_TYPE_VPU"; break; @@ -2403,533 +2047,405 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_type_t value) { } return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { switch (value) { - case UR_DEVICE_INFO_TYPE: os << "UR_DEVICE_INFO_TYPE"; break; - case UR_DEVICE_INFO_VENDOR_ID: os << "UR_DEVICE_INFO_VENDOR_ID"; break; - case UR_DEVICE_INFO_DEVICE_ID: os << "UR_DEVICE_INFO_DEVICE_ID"; break; - case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: os << "UR_DEVICE_INFO_MAX_COMPUTE_UNITS"; break; - case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: os << "UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS"; break; - case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: os << "UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES"; break; - case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: os << "UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE"; break; - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: os << "UR_DEVICE_INFO_SINGLE_FP_CONFIG"; break; - case UR_DEVICE_INFO_HALF_FP_CONFIG: os << "UR_DEVICE_INFO_HALF_FP_CONFIG"; break; - case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: os << "UR_DEVICE_INFO_DOUBLE_FP_CONFIG"; break; - case UR_DEVICE_INFO_QUEUE_PROPERTIES: os << "UR_DEVICE_INFO_QUEUE_PROPERTIES"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE"; break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: os << "UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE"; break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF"; break; - case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: os << "UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY"; break; - case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: os << "UR_DEVICE_INFO_MEMORY_CLOCK_RATE"; break; - case UR_DEVICE_INFO_ADDRESS_BITS: os << "UR_DEVICE_INFO_ADDRESS_BITS"; break; - case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: os << "UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE"; break; - case UR_DEVICE_INFO_IMAGE_SUPPORTED: os << "UR_DEVICE_INFO_IMAGE_SUPPORTED"; break; - case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: os << "UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS"; break; - case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: os << "UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS"; break; - case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: os << "UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS"; break; - case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: os << "UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH"; break; - case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: os << "UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT"; break; - case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: os << "UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH"; break; - case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: os << "UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT"; break; - case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: os << "UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH"; break; - case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: os << "UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE"; break; - case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: os << "UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE"; break; - case UR_DEVICE_INFO_MAX_SAMPLERS: os << "UR_DEVICE_INFO_MAX_SAMPLERS"; break; - case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: os << "UR_DEVICE_INFO_MAX_PARAMETER_SIZE"; break; - case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: os << "UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN"; break; - case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: os << "UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE"; break; - case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: os << "UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE"; break; - case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: os << "UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE"; break; - case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: os << "UR_DEVICE_INFO_GLOBAL_MEM_SIZE"; break; - case UR_DEVICE_INFO_GLOBAL_MEM_FREE: os << "UR_DEVICE_INFO_GLOBAL_MEM_FREE"; break; - case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: os << "UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE"; break; - case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: os << "UR_DEVICE_INFO_MAX_CONSTANT_ARGS"; break; - case UR_DEVICE_INFO_LOCAL_MEM_TYPE: os << "UR_DEVICE_INFO_LOCAL_MEM_TYPE"; break; - case UR_DEVICE_INFO_LOCAL_MEM_SIZE: os << "UR_DEVICE_INFO_LOCAL_MEM_SIZE"; break; - case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: os << "UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT"; break; - case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: os << "UR_DEVICE_INFO_HOST_UNIFIED_MEMORY"; break; - case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: os << "UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION"; break; - case UR_DEVICE_INFO_ENDIAN_LITTLE: os << "UR_DEVICE_INFO_ENDIAN_LITTLE"; break; - case UR_DEVICE_INFO_AVAILABLE: os << "UR_DEVICE_INFO_AVAILABLE"; break; - case UR_DEVICE_INFO_COMPILER_AVAILABLE: os << "UR_DEVICE_INFO_COMPILER_AVAILABLE"; break; - case UR_DEVICE_INFO_LINKER_AVAILABLE: os << "UR_DEVICE_INFO_LINKER_AVAILABLE"; break; - case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: os << "UR_DEVICE_INFO_EXECUTION_CAPABILITIES"; break; - case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: os << "UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES"; break; - case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: os << "UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES"; break; - case UR_DEVICE_INFO_BUILT_IN_KERNELS: os << "UR_DEVICE_INFO_BUILT_IN_KERNELS"; break; - case UR_DEVICE_INFO_PLATFORM: os << "UR_DEVICE_INFO_PLATFORM"; break; - case UR_DEVICE_INFO_REFERENCE_COUNT: os << "UR_DEVICE_INFO_REFERENCE_COUNT"; break; - case UR_DEVICE_INFO_IL_VERSION: os << "UR_DEVICE_INFO_IL_VERSION"; break; - case UR_DEVICE_INFO_NAME: os << "UR_DEVICE_INFO_NAME"; break; - case UR_DEVICE_INFO_VENDOR: os << "UR_DEVICE_INFO_VENDOR"; break; - case UR_DEVICE_INFO_DRIVER_VERSION: os << "UR_DEVICE_INFO_DRIVER_VERSION"; break; - case UR_DEVICE_INFO_PROFILE: os << "UR_DEVICE_INFO_PROFILE"; break; - case UR_DEVICE_INFO_VERSION: os << "UR_DEVICE_INFO_VERSION"; break; - case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: os << "UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION"; break; - case UR_DEVICE_INFO_EXTENSIONS: os << "UR_DEVICE_INFO_EXTENSIONS"; break; - case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: os << "UR_DEVICE_INFO_PRINTF_BUFFER_SIZE"; break; - case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: os << "UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC"; break; - case UR_DEVICE_INFO_PARENT_DEVICE: os << "UR_DEVICE_INFO_PARENT_DEVICE"; break; - case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: os << "UR_DEVICE_INFO_SUPPORTED_PARTITIONS"; break; - case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: os << "UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES"; break; - case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: os << "UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN"; break; - case UR_DEVICE_INFO_PARTITION_TYPE: os << "UR_DEVICE_INFO_PARTITION_TYPE"; break; - case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: os << "UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS"; break; - case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: os << "UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS"; break; - case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: os << "UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL"; break; - case UR_DEVICE_INFO_USM_HOST_SUPPORT: os << "UR_DEVICE_INFO_USM_HOST_SUPPORT"; break; - case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: os << "UR_DEVICE_INFO_USM_DEVICE_SUPPORT"; break; - case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: os << "UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT"; break; - case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: os << "UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT"; break; - case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: os << "UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT"; break; - case UR_DEVICE_INFO_UUID: os << "UR_DEVICE_INFO_UUID"; break; - case UR_DEVICE_INFO_PCI_ADDRESS: os << "UR_DEVICE_INFO_PCI_ADDRESS"; break; - case UR_DEVICE_INFO_GPU_EU_COUNT: os << "UR_DEVICE_INFO_GPU_EU_COUNT"; break; - case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: os << "UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH"; break; - case UR_DEVICE_INFO_GPU_EU_SLICES: os << "UR_DEVICE_INFO_GPU_EU_SLICES"; break; - case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: os << "UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE"; break; - case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: os << "UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE"; break; - case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: os << "UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU"; break; - case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: os << "UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH"; break; - case UR_DEVICE_INFO_IMAGE_SRGB: os << "UR_DEVICE_INFO_IMAGE_SRGB"; break; - case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: os << "UR_DEVICE_INFO_BUILD_ON_SUBDEVICE"; break; - case UR_DEVICE_INFO_ATOMIC_64: os << "UR_DEVICE_INFO_ATOMIC_64"; break; - case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: os << "UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES"; break; - case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: os << "UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES"; break; - case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: os << "UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES"; break; - case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: os << "UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES"; break; - case UR_DEVICE_INFO_BFLOAT16: os << "UR_DEVICE_INFO_BFLOAT16"; break; - case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: os << "UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES"; break; - case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: os << "UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS"; break; - case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: os << "UR_DEVICE_INFO_MEMORY_BUS_WIDTH"; break; - case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: os << "UR_DEVICE_INFO_MAX_WORK_GROUPS_3D"; break; - case UR_DEVICE_INFO_ASYNC_BARRIER: os << "UR_DEVICE_INFO_ASYNC_BARRIER"; break; - case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: os << "UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT"; break; - case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: os << "UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED"; break; - case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: os << "UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP"; break; - case UR_DEVICE_INFO_IP_VERSION: os << "UR_DEVICE_INFO_IP_VERSION"; break; - case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: os << "UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT"; break; - case UR_DEVICE_INFO_ESIMD_SUPPORT: os << "UR_DEVICE_INFO_ESIMD_SUPPORT"; break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP: os << "UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP"; break; - case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP: os << "UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP"; break; - case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP: os << "UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP"; break; - case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP: os << "UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP"; break; - case UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP: os << "UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP: os << "UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP: os << "UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP"; break; - case UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP: os << "UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP: os << "UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP: os << "UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP: os << "UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP: os << "UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP"; break; @@ -2939,87 +2455,76 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_device_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_DEVICE_INFO_TYPE: { const ur_device_type_t *tptr = (const ur_device_type_t *)ptr; if (sizeof(ur_device_type_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_type_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_type_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_VENDOR_ID: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_DEVICE_ID: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { const size_t *tptr = (const size_t *)ptr; @@ -3034,925 +2539,792 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { - const ur_device_fp_capability_flags_t *tptr = - (const ur_device_fp_capability_flags_t *)ptr; + const ur_device_fp_capability_flags_t *tptr = (const ur_device_fp_capability_flags_t *)ptr; if (sizeof(ur_device_fp_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_fp_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_fp_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_HALF_FP_CONFIG: { - const ur_device_fp_capability_flags_t *tptr = - (const ur_device_fp_capability_flags_t *)ptr; + const ur_device_fp_capability_flags_t *tptr = (const ur_device_fp_capability_flags_t *)ptr; if (sizeof(ur_device_fp_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_fp_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_fp_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { - const ur_device_fp_capability_flags_t *tptr = - (const ur_device_fp_capability_flags_t *)ptr; + const ur_device_fp_capability_flags_t *tptr = (const ur_device_fp_capability_flags_t *)ptr; if (sizeof(ur_device_fp_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_fp_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_fp_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_QUEUE_PROPERTIES: { const ur_queue_flags_t *tptr = (const ur_queue_flags_t *)ptr; if (sizeof(ur_queue_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_ADDRESS_BITS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE_SUPPORTED: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_SAMPLERS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: { - const ur_device_mem_cache_type_t *tptr = - (const ur_device_mem_cache_type_t *)ptr; + const ur_device_mem_cache_type_t *tptr = (const ur_device_mem_cache_type_t *)ptr; if (sizeof(ur_device_mem_cache_type_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_mem_cache_type_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_mem_cache_type_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_LOCAL_MEM_TYPE: { - const ur_device_local_mem_type_t *tptr = - (const ur_device_local_mem_type_t *)ptr; + const ur_device_local_mem_type_t *tptr = (const ur_device_local_mem_type_t *)ptr; if (sizeof(ur_device_local_mem_type_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_local_mem_type_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_local_mem_type_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_LOCAL_MEM_SIZE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_ENDIAN_LITTLE: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_AVAILABLE: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_COMPILER_AVAILABLE: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_LINKER_AVAILABLE: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: { - const ur_device_exec_capability_flags_t *tptr = - (const ur_device_exec_capability_flags_t *)ptr; + const ur_device_exec_capability_flags_t *tptr = (const ur_device_exec_capability_flags_t *)ptr; if (sizeof(ur_device_exec_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_exec_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_exec_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: { const ur_queue_flags_t *tptr = (const ur_queue_flags_t *)ptr; if (sizeof(ur_queue_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: { const ur_queue_flags_t *tptr = (const ur_queue_flags_t *)ptr; if (sizeof(ur_queue_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_BUILT_IN_KERNELS: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_PLATFORM: { const ur_platform_handle_t *tptr = (const ur_platform_handle_t *)ptr; if (sizeof(ur_platform_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_platform_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_platform_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IL_VERSION: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_NAME: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_VENDOR: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_DRIVER_VERSION: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_PROFILE: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_VERSION: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_EXTENSIONS: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PARENT_DEVICE: { const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; if (sizeof(ur_device_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { const ur_device_partition_t *tptr = (const ur_device_partition_t *)ptr; @@ -3967,41 +3339,34 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { - const ur_device_affinity_domain_flags_t *tptr = - (const ur_device_affinity_domain_flags_t *)ptr; + const ur_device_affinity_domain_flags_t *tptr = (const ur_device_affinity_domain_flags_t *)ptr; if (sizeof(ur_device_affinity_domain_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_affinity_domain_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_affinity_domain_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_PARTITION_TYPE: { - const ur_device_partition_property_t *tptr = - (const ur_device_partition_property_t *)ptr; + const ur_device_partition_property_t *tptr = (const ur_device_partition_property_t *)ptr; os << "{"; size_t nelems = size / sizeof(ur_device_partition_property_t); for (size_t i = 0; i < nelems; ++i) { @@ -4013,35 +3378,30 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { const uint32_t *tptr = (const uint32_t *)ptr; @@ -4056,359 +3416,301 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_DEVICE_INFO_USM_HOST_SUPPORT: { - const ur_device_usm_access_capability_flags_t *tptr = - (const ur_device_usm_access_capability_flags_t *)ptr; + const ur_device_usm_access_capability_flags_t *tptr = (const ur_device_usm_access_capability_flags_t *)ptr; if (sizeof(ur_device_usm_access_capability_flags_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" - << sizeof(ur_device_usm_access_capability_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_usm_access_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, - *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: { - const ur_device_usm_access_capability_flags_t *tptr = - (const ur_device_usm_access_capability_flags_t *)ptr; + const ur_device_usm_access_capability_flags_t *tptr = (const ur_device_usm_access_capability_flags_t *)ptr; if (sizeof(ur_device_usm_access_capability_flags_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" - << sizeof(ur_device_usm_access_capability_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_usm_access_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, - *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: { - const ur_device_usm_access_capability_flags_t *tptr = - (const ur_device_usm_access_capability_flags_t *)ptr; + const ur_device_usm_access_capability_flags_t *tptr = (const ur_device_usm_access_capability_flags_t *)ptr; if (sizeof(ur_device_usm_access_capability_flags_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" - << sizeof(ur_device_usm_access_capability_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_usm_access_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, - *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: { - const ur_device_usm_access_capability_flags_t *tptr = - (const ur_device_usm_access_capability_flags_t *)ptr; + const ur_device_usm_access_capability_flags_t *tptr = (const ur_device_usm_access_capability_flags_t *)ptr; if (sizeof(ur_device_usm_access_capability_flags_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" - << sizeof(ur_device_usm_access_capability_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_usm_access_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, - *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { - const ur_device_usm_access_capability_flags_t *tptr = - (const ur_device_usm_access_capability_flags_t *)ptr; + const ur_device_usm_access_capability_flags_t *tptr = (const ur_device_usm_access_capability_flags_t *)ptr; if (sizeof(ur_device_usm_access_capability_flags_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" - << sizeof(ur_device_usm_access_capability_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_usm_access_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, - *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_UUID: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_PCI_ADDRESS: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_DEVICE_INFO_GPU_EU_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GPU_EU_SLICES: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE_SRGB: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_ATOMIC_64: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - const ur_memory_order_capability_flags_t *tptr = - (const ur_memory_order_capability_flags_t *)ptr; + const ur_memory_order_capability_flags_t *tptr = (const ur_memory_order_capability_flags_t *)ptr; if (sizeof(ur_memory_order_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { - const ur_memory_scope_capability_flags_t *tptr = - (const ur_memory_scope_capability_flags_t *)ptr; + const ur_memory_scope_capability_flags_t *tptr = (const ur_memory_scope_capability_flags_t *)ptr; if (sizeof(ur_memory_scope_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { - const ur_memory_order_capability_flags_t *tptr = - (const ur_memory_order_capability_flags_t *)ptr; + const ur_memory_order_capability_flags_t *tptr = (const ur_memory_order_capability_flags_t *)ptr; if (sizeof(ur_memory_order_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - const ur_memory_scope_capability_flags_t *tptr = - (const ur_memory_scope_capability_flags_t *)ptr; + const ur_memory_scope_capability_flags_t *tptr = (const ur_memory_scope_capability_flags_t *)ptr; if (sizeof(ur_memory_scope_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_DEVICE_INFO_BFLOAT16: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: { const size_t *tptr = (const size_t *)ptr; @@ -4423,323 +3725,277 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_DEVICE_INFO_ASYNC_BARRIER: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IP_VERSION: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_ESIMD_SUPPORT: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4747,34 +4003,33 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_affinity_domain_flag_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_affinity_domain_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_affinity_domain_flag_t value) { + switch (value) { case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA"; break; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE: os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE"; break; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE: os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE"; break; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE: os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE"; break; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE: os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE"; break; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE"; break; @@ -4784,16 +4039,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_affinity_domain_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) { + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) == (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) { val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; if (!first) { os << " | "; @@ -4803,8 +4058,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; } - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) { + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) == (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) { val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE; if (!first) { os << " | "; @@ -4814,8 +4068,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE; } - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) { + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) == (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) { val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE; if (!first) { os << " | "; @@ -4825,8 +4078,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE; } - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) { + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) == (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) { val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE; if (!first) { os << " | "; @@ -4836,8 +4088,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE; } - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) { + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) == (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) { val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE; if (!first) { os << " | "; @@ -4847,8 +4098,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE; } - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) { + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) == (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) { val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; if (!first) { os << " | "; @@ -4866,24 +4116,24 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_partition_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_partition_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_partition_t value) { switch (value) { - case UR_DEVICE_PARTITION_EQUALLY: os << "UR_DEVICE_PARTITION_EQUALLY"; break; - case UR_DEVICE_PARTITION_BY_COUNTS: os << "UR_DEVICE_PARTITION_BY_COUNTS"; break; - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: os << "UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN"; break; - case UR_DEVICE_PARTITION_BY_CSLICE: os << "UR_DEVICE_PARTITION_BY_CSLICE"; break; @@ -4893,11 +4143,14 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } +namespace ur::details { -inline void -ur_params::serializeUnion(std::ostream &os, - const union ur_device_partition_value_t params, - const enum ur_device_partition_t tag) { +/////////////////////////////////////////////////////////////////////////////// +// @brief Print ur_device_partition_value_t union +inline ur_result_t printUnion( + std::ostream &os, + const union ur_device_partition_value_t params, + const enum ur_device_partition_t tag) { os << "(union ur_device_partition_value_t){"; switch (tag) { @@ -4919,19 +4172,23 @@ ur_params::serializeUnion(std::ostream &os, os << ".affinity_domain = "; - ur_params::serializeFlag( - os, (params.affinity_domain)); + ur::details::printFlag(os, + (params.affinity_domain)); break; default: os << ""; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } os << "}"; -} -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_partition_property_t params) { + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_partition_property_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_device_partition_property_t params) { os << "(struct ur_device_partition_property_t){"; os << ".type = "; @@ -4940,14 +4197,16 @@ operator<<(std::ostream &os, os << ", "; os << ".value = "; - ur_params::serializeUnion(os, (params.value), params.type); + ur::details::printUnion(os, (params.value), params.type); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_partition_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_partition_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_device_partition_properties_t params) { os << "(struct ur_device_partition_properties_t){"; os << ".stype = "; @@ -4957,12 +4216,14 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".pProperties = "; - ur_params::serializePtr(os, (params.pProperties)); + ur::details::printPtr(os, + (params.pProperties)); os << ", "; os << ".PropCount = "; @@ -4972,38 +4233,33 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_fp_capability_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_fp_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_fp_capability_flag_t value) { switch (value) { - case UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT: os << "UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST: os << "UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO: os << "UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF: os << "UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN: os << "UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_DENORM: os << "UR_DEVICE_FP_CAPABILITY_FLAG_DENORM"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_FMA: os << "UR_DEVICE_FP_CAPABILITY_FLAG_FMA"; break; - case UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT: os << "UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT"; break; @@ -5013,18 +4269,17 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_fp_capability_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) { - val ^= (uint32_t) - UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) { + val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; if (!first) { os << " | "; } else { @@ -5033,8 +4288,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; if (!first) { os << " | "; @@ -5044,8 +4298,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; if (!first) { os << " | "; @@ -5055,8 +4308,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; if (!first) { os << " | "; @@ -5066,8 +4318,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; if (!first) { os << " | "; @@ -5077,8 +4328,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_DENORM) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_DENORM) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_DENORM) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_DENORM) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; if (!first) { os << " | "; @@ -5088,8 +4338,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_FMA) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_FMA) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_FMA) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_FMA) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_FMA; if (!first) { os << " | "; @@ -5099,8 +4348,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_FP_CAPABILITY_FLAG_FMA; } - if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) == - (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) { + if ((val & UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) == (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) { val ^= (uint32_t)UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT; if (!first) { os << " | "; @@ -5118,20 +4366,21 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_mem_cache_type_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_mem_cache_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_mem_cache_type_t value) { switch (value) { - case UR_DEVICE_MEM_CACHE_TYPE_NONE: os << "UR_DEVICE_MEM_CACHE_TYPE_NONE"; break; - case UR_DEVICE_MEM_CACHE_TYPE_READ_ONLY_CACHE: os << "UR_DEVICE_MEM_CACHE_TYPE_READ_ONLY_CACHE"; break; - case UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE: os << "UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE"; break; @@ -5141,18 +4390,18 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_local_mem_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_local_mem_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_local_mem_type_t value) { switch (value) { - case UR_DEVICE_LOCAL_MEM_TYPE_NONE: os << "UR_DEVICE_LOCAL_MEM_TYPE_NONE"; break; - case UR_DEVICE_LOCAL_MEM_TYPE_LOCAL: os << "UR_DEVICE_LOCAL_MEM_TYPE_LOCAL"; break; - case UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL: os << "UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL"; break; @@ -5162,14 +4411,15 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_exec_capability_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_exec_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_exec_capability_flag_t value) { switch (value) { - case UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL: os << "UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL"; break; - case UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL: os << "UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL"; break; @@ -5179,16 +4429,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_exec_capability_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL) == - (uint32_t)UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL) { + if ((val & UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL) == (uint32_t)UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL) { val ^= (uint32_t)UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL; if (!first) { os << " | "; @@ -5198,8 +4448,7 @@ inline void serializeFlag(std::ostream &os, os << UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL; } - if ((val & UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL) == - (uint32_t)UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL) { + if ((val & UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL) == (uint32_t)UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL) { val ^= (uint32_t)UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL; if (!first) { os << " | "; @@ -5217,11 +4466,14 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_native_properties_t params) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_device_native_properties_t params) { os << "(struct ur_device_native_properties_t){"; os << ".stype = "; @@ -5231,7 +4483,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -5241,26 +4494,24 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_memory_order_capability_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_memory_order_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_memory_order_capability_flag_t value) { switch (value) { - case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED: os << "UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED"; break; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE: os << "UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE"; break; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE: os << "UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE"; break; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL: os << "UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL"; break; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST: os << "UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST"; break; @@ -5270,16 +4521,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_memory_order_capability_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED) == - (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED) { + if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED) == (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED) { val ^= (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; if (!first) { os << " | "; @@ -5289,8 +4540,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; } - if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE) == - (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE) { + if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE) == (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE) { val ^= (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE; if (!first) { os << " | "; @@ -5300,8 +4550,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE; } - if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE) == - (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE) { + if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE) == (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE) { val ^= (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; if (!first) { os << " | "; @@ -5311,8 +4560,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; } - if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL) == - (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL) { + if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL) == (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL) { val ^= (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; if (!first) { os << " | "; @@ -5322,8 +4570,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; } - if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST) == - (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST) { + if ((val & UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST) == (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST) { val ^= (uint32_t)UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; if (!first) { os << " | "; @@ -5341,28 +4588,27 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_memory_scope_capability_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_memory_scope_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_memory_scope_capability_flag_t value) { switch (value) { - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM: os << "UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM"; break; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP: os << "UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP"; break; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP: os << "UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP"; break; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE: os << "UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE"; break; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM: os << "UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM"; break; @@ -5372,16 +4618,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_memory_scope_capability_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM) == - (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM) { + if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM) == (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM) { val ^= (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM; if (!first) { os << " | "; @@ -5391,8 +4637,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM; } - if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP) == - (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP) { + if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP) == (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP) { val ^= (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP; if (!first) { os << " | "; @@ -5402,8 +4647,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP; } - if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP) == - (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP) { + if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP) == (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP) { val ^= (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; if (!first) { os << " | "; @@ -5413,8 +4657,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; } - if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE) == - (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE) { + if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE) == (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE) { val ^= (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; if (!first) { os << " | "; @@ -5424,8 +4667,7 @@ inline void serializeFlag(std::ostream &os, os << UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; } - if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM) == - (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM) { + if ((val & UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM) == (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM) { val ^= (uint32_t)UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; if (!first) { os << " | "; @@ -5443,25 +4685,24 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, - enum ur_device_usm_access_capability_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_usm_access_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_device_usm_access_capability_flag_t value) { switch (value) { - case UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS: os << "UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS"; break; - case UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS: os << "UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS"; break; - case UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS: os << "UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS"; break; - case UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS: os << "UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS"; break; @@ -5471,17 +4712,16 @@ operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_usm_access_capability_flag_t flag template <> -inline void -serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS) == - (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS) { + if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS) == (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS) { val ^= (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS; if (!first) { os << " | "; @@ -5491,8 +4731,7 @@ serializeFlag(std::ostream &os, os << UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS; } - if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS) == - (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS) { + if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS) == (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS) { val ^= (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS; if (!first) { os << " | "; @@ -5502,8 +4741,7 @@ serializeFlag(std::ostream &os, os << UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS; } - if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS) == - (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS) { + if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS) == (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS) { val ^= (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS; if (!first) { os << " | "; @@ -5513,11 +4751,8 @@ serializeFlag(std::ostream &os, os << UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS; } - if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS) == - (uint32_t) - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS) { - val ^= (uint32_t) - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS; + if ((val & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS) == (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS) { + val ^= (uint32_t)UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS; if (!first) { os << " | "; } else { @@ -5534,12 +4769,15 @@ serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_context_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_context_flag_t value) { switch (value) { - case UR_CONTEXT_FLAG_TBD: os << "UR_CONTEXT_FLAG_TBD"; break; @@ -5549,10 +4787,12 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_context_flag_t flag template <> -inline void serializeFlag(std::ostream &os, uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; @@ -5574,10 +4814,14 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_context_properties_t params) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_context_properties_t params) { os << "(struct ur_context_properties_t){"; os << ".stype = "; @@ -5587,52 +4831,48 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, (params.flags)); + ur::details::printFlag(os, + (params.flags)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_context_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_context_info_t value) { switch (value) { - case UR_CONTEXT_INFO_NUM_DEVICES: os << "UR_CONTEXT_INFO_NUM_DEVICES"; break; - case UR_CONTEXT_INFO_DEVICES: os << "UR_CONTEXT_INFO_DEVICES"; break; - case UR_CONTEXT_INFO_REFERENCE_COUNT: os << "UR_CONTEXT_INFO_REFERENCE_COUNT"; break; - case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: os << "UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT"; break; - case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: os << "UR_CONTEXT_INFO_USM_FILL2D_SUPPORT"; break; - case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: os << "UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES"; break; - case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: os << "UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES"; break; - case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: os << "UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES"; break; - case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: os << "UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES"; break; @@ -5642,31 +4882,28 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_context_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_context_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_context_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_CONTEXT_INFO_NUM_DEVICES: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_CONTEXT_INFO_DEVICES: { const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; @@ -5677,125 +4914,112 @@ inline void serializeTagged(std::ostream &os, const void *ptr, os << ", "; } - ur_params::serializePtr(os, tptr[i]); + ur::details::printPtr(os, + tptr[i]); } os << "}"; } break; - case UR_CONTEXT_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - const ur_memory_order_capability_flags_t *tptr = - (const ur_memory_order_capability_flags_t *)ptr; + const ur_memory_order_capability_flags_t *tptr = (const ur_memory_order_capability_flags_t *)ptr; if (sizeof(ur_memory_order_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { - const ur_memory_scope_capability_flags_t *tptr = - (const ur_memory_scope_capability_flags_t *)ptr; + const ur_memory_scope_capability_flags_t *tptr = (const ur_memory_scope_capability_flags_t *)ptr; if (sizeof(ur_memory_scope_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { - const ur_memory_order_capability_flags_t *tptr = - (const ur_memory_order_capability_flags_t *)ptr; + const ur_memory_order_capability_flags_t *tptr = (const ur_memory_order_capability_flags_t *)ptr; if (sizeof(ur_memory_order_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_order_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - const ur_memory_scope_capability_flags_t *tptr = - (const ur_memory_scope_capability_flags_t *)ptr; + const ur_memory_scope_capability_flags_t *tptr = (const ur_memory_scope_capability_flags_t *)ptr; if (sizeof(ur_memory_scope_capability_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_memory_scope_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_native_properties_t params) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_context_native_properties_t params) { os << "(struct ur_context_native_properties_t){"; os << ".stype = "; @@ -5805,7 +5029,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -5815,29 +5040,27 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_mem_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_mem_flag_t value) { switch (value) { - case UR_MEM_FLAG_READ_WRITE: os << "UR_MEM_FLAG_READ_WRITE"; break; - case UR_MEM_FLAG_WRITE_ONLY: os << "UR_MEM_FLAG_WRITE_ONLY"; break; - case UR_MEM_FLAG_READ_ONLY: os << "UR_MEM_FLAG_READ_ONLY"; break; - case UR_MEM_FLAG_USE_HOST_POINTER: os << "UR_MEM_FLAG_USE_HOST_POINTER"; break; - case UR_MEM_FLAG_ALLOC_HOST_POINTER: os << "UR_MEM_FLAG_ALLOC_HOST_POINTER"; break; - case UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER: os << "UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER"; break; @@ -5847,10 +5070,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_mem_flag_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_mem_flag_t flag template <> -inline void serializeFlag(std::ostream &os, uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; @@ -5884,8 +5109,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_MEM_FLAG_READ_ONLY; } - if ((val & UR_MEM_FLAG_USE_HOST_POINTER) == - (uint32_t)UR_MEM_FLAG_USE_HOST_POINTER) { + if ((val & UR_MEM_FLAG_USE_HOST_POINTER) == (uint32_t)UR_MEM_FLAG_USE_HOST_POINTER) { val ^= (uint32_t)UR_MEM_FLAG_USE_HOST_POINTER; if (!first) { os << " | "; @@ -5895,8 +5119,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_MEM_FLAG_USE_HOST_POINTER; } - if ((val & UR_MEM_FLAG_ALLOC_HOST_POINTER) == - (uint32_t)UR_MEM_FLAG_ALLOC_HOST_POINTER) { + if ((val & UR_MEM_FLAG_ALLOC_HOST_POINTER) == (uint32_t)UR_MEM_FLAG_ALLOC_HOST_POINTER) { val ^= (uint32_t)UR_MEM_FLAG_ALLOC_HOST_POINTER; if (!first) { os << " | "; @@ -5906,8 +5129,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_MEM_FLAG_ALLOC_HOST_POINTER; } - if ((val & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) == - (uint32_t)UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + if ((val & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) == (uint32_t)UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { val ^= (uint32_t)UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; if (!first) { os << " | "; @@ -5925,35 +5147,33 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, enum ur_mem_type_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_mem_type_t value) { switch (value) { - case UR_MEM_TYPE_BUFFER: os << "UR_MEM_TYPE_BUFFER"; break; - case UR_MEM_TYPE_IMAGE2D: os << "UR_MEM_TYPE_IMAGE2D"; break; - case UR_MEM_TYPE_IMAGE3D: os << "UR_MEM_TYPE_IMAGE3D"; break; - case UR_MEM_TYPE_IMAGE2D_ARRAY: os << "UR_MEM_TYPE_IMAGE2D_ARRAY"; break; - case UR_MEM_TYPE_IMAGE1D: os << "UR_MEM_TYPE_IMAGE1D"; break; - case UR_MEM_TYPE_IMAGE1D_ARRAY: os << "UR_MEM_TYPE_IMAGE1D_ARRAY"; break; - case UR_MEM_TYPE_IMAGE1D_BUFFER: os << "UR_MEM_TYPE_IMAGE1D_BUFFER"; break; @@ -5963,13 +5183,15 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_mem_type_t value) { } return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_mem_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_mem_info_t value) { switch (value) { - case UR_MEM_INFO_SIZE: os << "UR_MEM_INFO_SIZE"; break; - case UR_MEM_INFO_CONTEXT: os << "UR_MEM_INFO_CONTEXT"; break; @@ -5979,110 +5201,97 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_mem_info_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_mem_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_mem_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_mem_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_MEM_INFO_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_MEM_INFO_CONTEXT: { const ur_context_handle_t *tptr = (const ur_context_handle_t *)ptr; if (sizeof(ur_context_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_image_channel_order_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_image_channel_order_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_image_channel_order_t value) { + switch (value) { case UR_IMAGE_CHANNEL_ORDER_A: os << "UR_IMAGE_CHANNEL_ORDER_A"; break; - case UR_IMAGE_CHANNEL_ORDER_R: os << "UR_IMAGE_CHANNEL_ORDER_R"; break; - case UR_IMAGE_CHANNEL_ORDER_RG: os << "UR_IMAGE_CHANNEL_ORDER_RG"; break; - case UR_IMAGE_CHANNEL_ORDER_RA: os << "UR_IMAGE_CHANNEL_ORDER_RA"; break; - case UR_IMAGE_CHANNEL_ORDER_RGB: os << "UR_IMAGE_CHANNEL_ORDER_RGB"; break; - case UR_IMAGE_CHANNEL_ORDER_RGBA: os << "UR_IMAGE_CHANNEL_ORDER_RGBA"; break; - case UR_IMAGE_CHANNEL_ORDER_BGRA: os << "UR_IMAGE_CHANNEL_ORDER_BGRA"; break; - case UR_IMAGE_CHANNEL_ORDER_ARGB: os << "UR_IMAGE_CHANNEL_ORDER_ARGB"; break; - case UR_IMAGE_CHANNEL_ORDER_ABGR: os << "UR_IMAGE_CHANNEL_ORDER_ABGR"; break; - case UR_IMAGE_CHANNEL_ORDER_INTENSITY: os << "UR_IMAGE_CHANNEL_ORDER_INTENSITY"; break; - case UR_IMAGE_CHANNEL_ORDER_LUMINANCE: os << "UR_IMAGE_CHANNEL_ORDER_LUMINANCE"; break; - case UR_IMAGE_CHANNEL_ORDER_RX: os << "UR_IMAGE_CHANNEL_ORDER_RX"; break; - case UR_IMAGE_CHANNEL_ORDER_RGX: os << "UR_IMAGE_CHANNEL_ORDER_RGX"; break; - case UR_IMAGE_CHANNEL_ORDER_RGBX: os << "UR_IMAGE_CHANNEL_ORDER_RGBX"; break; - case UR_IMAGE_CHANNEL_ORDER_SRGBA: os << "UR_IMAGE_CHANNEL_ORDER_SRGBA"; break; @@ -6092,66 +5301,54 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_image_channel_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_image_channel_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_image_channel_type_t value) { switch (value) { - case UR_IMAGE_CHANNEL_TYPE_SNORM_INT8: os << "UR_IMAGE_CHANNEL_TYPE_SNORM_INT8"; break; - case UR_IMAGE_CHANNEL_TYPE_SNORM_INT16: os << "UR_IMAGE_CHANNEL_TYPE_SNORM_INT16"; break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: os << "UR_IMAGE_CHANNEL_TYPE_UNORM_INT8"; break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: os << "UR_IMAGE_CHANNEL_TYPE_UNORM_INT16"; break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: os << "UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565"; break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: os << "UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555"; break; - case UR_IMAGE_CHANNEL_TYPE_INT_101010: os << "UR_IMAGE_CHANNEL_TYPE_INT_101010"; break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: os << "UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8"; break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: os << "UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16"; break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: os << "UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32"; break; - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: os << "UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8"; break; - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: os << "UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16"; break; - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: os << "UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32"; break; - case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: os << "UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT"; break; - case UR_IMAGE_CHANNEL_TYPE_FLOAT: os << "UR_IMAGE_CHANNEL_TYPE_FLOAT"; break; @@ -6161,33 +5358,30 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_image_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_image_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_image_info_t value) { switch (value) { - case UR_IMAGE_INFO_FORMAT: os << "UR_IMAGE_INFO_FORMAT"; break; - case UR_IMAGE_INFO_ELEMENT_SIZE: os << "UR_IMAGE_INFO_ELEMENT_SIZE"; break; - case UR_IMAGE_INFO_ROW_PITCH: os << "UR_IMAGE_INFO_ROW_PITCH"; break; - case UR_IMAGE_INFO_SLICE_PITCH: os << "UR_IMAGE_INFO_SLICE_PITCH"; break; - case UR_IMAGE_INFO_WIDTH: os << "UR_IMAGE_INFO_WIDTH"; break; - case UR_IMAGE_INFO_HEIGHT: os << "UR_IMAGE_INFO_HEIGHT"; break; - case UR_IMAGE_INFO_DEPTH: os << "UR_IMAGE_INFO_DEPTH"; break; @@ -6197,109 +5391,95 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_image_info_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_image_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_image_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_image_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_IMAGE_INFO_FORMAT: { const ur_image_format_t *tptr = (const ur_image_format_t *)ptr; if (sizeof(ur_image_format_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_image_format_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_image_format_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_IMAGE_INFO_ELEMENT_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_IMAGE_INFO_ROW_PITCH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_IMAGE_INFO_SLICE_PITCH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_IMAGE_INFO_WIDTH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_IMAGE_INFO_HEIGHT: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_IMAGE_INFO_DEPTH: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6307,12 +5487,17 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_image_format_t params) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_image_format_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_image_format_t params) { os << "(struct ur_image_format_t){"; os << ".channelOrder = "; @@ -6327,8 +5512,11 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_image_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_image_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_image_desc_t params) { os << "(struct ur_image_desc_t){"; os << ".stype = "; @@ -6338,7 +5526,8 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".type = "; @@ -6388,8 +5577,11 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_buffer_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_buffer_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_buffer_properties_t params) { os << "(struct ur_buffer_properties_t){"; os << ".stype = "; @@ -6399,19 +5591,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".pHost = "; - ur_params::serializePtr(os, (params.pHost)); + ur::details::printPtr(os, + (params.pHost)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_buffer_channel_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_buffer_channel_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_buffer_channel_properties_t params) { os << "(struct ur_buffer_channel_properties_t){"; os << ".stype = "; @@ -6421,7 +5617,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".channel = "; @@ -6431,9 +5628,11 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_buffer_alloc_location_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_buffer_alloc_location_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_buffer_alloc_location_properties_t params) { os << "(struct ur_buffer_alloc_location_properties_t){"; os << ".stype = "; @@ -6443,7 +5642,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".location = "; @@ -6453,8 +5653,11 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_buffer_region_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_buffer_region_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_buffer_region_t params) { os << "(struct ur_buffer_region_t){"; os << ".stype = "; @@ -6464,7 +5667,8 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".origin = "; @@ -6479,10 +5683,12 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_buffer_create_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_buffer_create_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_buffer_create_type_t value) { switch (value) { - case UR_BUFFER_CREATE_TYPE_REGION: os << "UR_BUFFER_CREATE_TYPE_REGION"; break; @@ -6492,8 +5698,11 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_mem_native_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_mem_native_properties_t params) { os << "(struct ur_mem_native_properties_t){"; os << ".stype = "; @@ -6503,7 +5712,8 @@ operator<<(std::ostream &os, const struct ur_mem_native_properties_t params) { os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -6513,14 +5723,15 @@ operator<<(std::ostream &os, const struct ur_mem_native_properties_t params) { os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_sampler_filter_mode_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_filter_mode_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_sampler_filter_mode_t value) { switch (value) { - case UR_SAMPLER_FILTER_MODE_NEAREST: os << "UR_SAMPLER_FILTER_MODE_NEAREST"; break; - case UR_SAMPLER_FILTER_MODE_LINEAR: os << "UR_SAMPLER_FILTER_MODE_LINEAR"; break; @@ -6530,26 +5741,24 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_sampler_addressing_mode_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_addressing_mode_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_sampler_addressing_mode_t value) { switch (value) { - case UR_SAMPLER_ADDRESSING_MODE_NONE: os << "UR_SAMPLER_ADDRESSING_MODE_NONE"; break; - case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: os << "UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE"; break; - case UR_SAMPLER_ADDRESSING_MODE_CLAMP: os << "UR_SAMPLER_ADDRESSING_MODE_CLAMP"; break; - case UR_SAMPLER_ADDRESSING_MODE_REPEAT: os << "UR_SAMPLER_ADDRESSING_MODE_REPEAT"; break; - case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: os << "UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT"; break; @@ -6559,26 +5768,24 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_sampler_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_sampler_info_t value) { switch (value) { - case UR_SAMPLER_INFO_REFERENCE_COUNT: os << "UR_SAMPLER_INFO_REFERENCE_COUNT"; break; - case UR_SAMPLER_INFO_CONTEXT: os << "UR_SAMPLER_INFO_CONTEXT"; break; - case UR_SAMPLER_INFO_NORMALIZED_COORDS: os << "UR_SAMPLER_INFO_NORMALIZED_COORDS"; break; - case UR_SAMPLER_INFO_ADDRESSING_MODE: os << "UR_SAMPLER_INFO_ADDRESSING_MODE"; break; - case UR_SAMPLER_INFO_FILTER_MODE: os << "UR_SAMPLER_INFO_FILTER_MODE"; break; @@ -6588,84 +5795,72 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_sampler_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_sampler_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_sampler_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_SAMPLER_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_SAMPLER_INFO_CONTEXT: { const ur_context_handle_t *tptr = (const ur_context_handle_t *)ptr; if (sizeof(ur_context_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_SAMPLER_INFO_NORMALIZED_COORDS: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_SAMPLER_INFO_ADDRESSING_MODE: { - const ur_sampler_addressing_mode_t *tptr = - (const ur_sampler_addressing_mode_t *)ptr; + const ur_sampler_addressing_mode_t *tptr = (const ur_sampler_addressing_mode_t *)ptr; if (sizeof(ur_sampler_addressing_mode_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_sampler_addressing_mode_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_sampler_addressing_mode_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_SAMPLER_INFO_FILTER_MODE: { - const ur_sampler_filter_mode_t *tptr = - (const ur_sampler_filter_mode_t *)ptr; + const ur_sampler_filter_mode_t *tptr = (const ur_sampler_filter_mode_t *)ptr; if (sizeof(ur_sampler_filter_mode_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_sampler_filter_mode_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_sampler_filter_mode_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6673,12 +5868,17 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_sampler_desc_t params) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_sampler_desc_t params) { os << "(struct ur_sampler_desc_t){"; os << ".stype = "; @@ -6688,7 +5888,8 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".normalizedCoords = "; @@ -6708,9 +5909,11 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_sampler_native_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_sampler_native_properties_t params) { os << "(struct ur_sampler_native_properties_t){"; os << ".stype = "; @@ -6720,7 +5923,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -6730,10 +5934,12 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_host_mem_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_host_mem_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_host_mem_flag_t value) { switch (value) { - case UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT: os << "UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT"; break; @@ -6743,16 +5949,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_host_mem_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) == - (uint32_t)UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + if ((val & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) == (uint32_t)UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { val ^= (uint32_t)UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT; if (!first) { os << " | "; @@ -6770,20 +5976,21 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_device_mem_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_device_mem_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_device_mem_flag_t value) { switch (value) { - case UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED: os << "UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED"; break; - case UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT: os << "UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT"; break; - case UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY: os << "UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY"; break; @@ -6793,16 +6000,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_device_mem_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) == - (uint32_t)UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + if ((val & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) == (uint32_t)UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { val ^= (uint32_t)UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED; if (!first) { os << " | "; @@ -6812,8 +6019,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED; } - if ((val & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) == - (uint32_t)UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + if ((val & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) == (uint32_t)UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { val ^= (uint32_t)UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT; if (!first) { os << " | "; @@ -6823,8 +6029,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT; } - if ((val & UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY) == - (uint32_t)UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY) { + if ((val & UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY) == (uint32_t)UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY) { val ^= (uint32_t)UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; if (!first) { os << " | "; @@ -6842,12 +6047,15 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_pool_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_pool_flag_t value) { switch (value) { - case UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK: os << "UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK"; break; @@ -6857,15 +6065,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_flag_t flag template <> -inline void serializeFlag(std::ostream &os, uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) == - (uint32_t)UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { + if ((val & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) == (uint32_t)UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { val ^= (uint32_t)UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK; if (!first) { os << " | "; @@ -6883,23 +6092,24 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, enum ur_usm_type_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_type_t value) { switch (value) { - case UR_USM_TYPE_UNKNOWN: os << "UR_USM_TYPE_UNKNOWN"; break; - case UR_USM_TYPE_HOST: os << "UR_USM_TYPE_HOST"; break; - case UR_USM_TYPE_DEVICE: os << "UR_USM_TYPE_DEVICE"; break; - case UR_USM_TYPE_SHARED: os << "UR_USM_TYPE_SHARED"; break; @@ -6909,26 +6119,24 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_usm_type_t value) { } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_alloc_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_alloc_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_alloc_info_t value) { switch (value) { - case UR_USM_ALLOC_INFO_TYPE: os << "UR_USM_ALLOC_INFO_TYPE"; break; - case UR_USM_ALLOC_INFO_BASE_PTR: os << "UR_USM_ALLOC_INFO_BASE_PTR"; break; - case UR_USM_ALLOC_INFO_SIZE: os << "UR_USM_ALLOC_INFO_SIZE"; break; - case UR_USM_ALLOC_INFO_DEVICE: os << "UR_USM_ALLOC_INFO_DEVICE"; break; - case UR_USM_ALLOC_INFO_POOL: os << "UR_USM_ALLOC_INFO_POOL"; break; @@ -6938,152 +6146,134 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_alloc_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_usm_alloc_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_usm_alloc_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_USM_ALLOC_INFO_TYPE: { const ur_usm_type_t *tptr = (const ur_usm_type_t *)ptr; if (sizeof(ur_usm_type_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_usm_type_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_usm_type_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_USM_ALLOC_INFO_BASE_PTR: { - const void **tptr = (const void **)ptr; + const void *const *tptr = (const void *const *)ptr; if (sizeof(void *) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(void *) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(void *) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_USM_ALLOC_INFO_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_USM_ALLOC_INFO_DEVICE: { const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; if (sizeof(ur_device_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_USM_ALLOC_INFO_POOL: { const ur_usm_pool_handle_t *tptr = (const ur_usm_pool_handle_t *)ptr; if (sizeof(ur_usm_pool_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_usm_pool_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_usm_pool_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_advice_flag_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_advice_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value) { + switch (value) { case UR_USM_ADVICE_FLAG_DEFAULT: os << "UR_USM_ADVICE_FLAG_DEFAULT"; break; - case UR_USM_ADVICE_FLAG_SET_READ_MOSTLY: os << "UR_USM_ADVICE_FLAG_SET_READ_MOSTLY"; break; - case UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY: os << "UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY"; break; - case UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION: os << "UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION"; break; - case UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION: os << "UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION"; break; - case UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY: os << "UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY"; break; - case UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY: os << "UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY"; break; - case UR_USM_ADVICE_FLAG_BIAS_CACHED: os << "UR_USM_ADVICE_FLAG_BIAS_CACHED"; break; - case UR_USM_ADVICE_FLAG_BIAS_UNCACHED: os << "UR_USM_ADVICE_FLAG_BIAS_UNCACHED"; break; - case UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE: os << "UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE"; break; - case UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE: os << "UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE"; break; - case UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST: os << "UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST"; break; - case UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST: os << "UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST"; break; - case UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST: os << "UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST"; break; - case UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST: os << "UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST"; break; @@ -7093,16 +6283,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_advice_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_USM_ADVICE_FLAG_DEFAULT) == - (uint32_t)UR_USM_ADVICE_FLAG_DEFAULT) { + if ((val & UR_USM_ADVICE_FLAG_DEFAULT) == (uint32_t)UR_USM_ADVICE_FLAG_DEFAULT) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_DEFAULT; if (!first) { os << " | "; @@ -7112,8 +6302,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_DEFAULT; } - if ((val & UR_USM_ADVICE_FLAG_SET_READ_MOSTLY) == - (uint32_t)UR_USM_ADVICE_FLAG_SET_READ_MOSTLY) { + if ((val & UR_USM_ADVICE_FLAG_SET_READ_MOSTLY) == (uint32_t)UR_USM_ADVICE_FLAG_SET_READ_MOSTLY) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; if (!first) { os << " | "; @@ -7123,8 +6312,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; } - if ((val & UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY) == - (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY) { + if ((val & UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY; if (!first) { os << " | "; @@ -7134,8 +6322,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY; } - if ((val & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) == - (uint32_t)UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) { + if ((val & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) == (uint32_t)UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION; if (!first) { os << " | "; @@ -7145,8 +6332,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION; } - if ((val & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) == - (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) { + if ((val & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION; if (!first) { os << " | "; @@ -7156,8 +6342,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION; } - if ((val & UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY) == - (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY) { + if ((val & UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY) == (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY; if (!first) { os << " | "; @@ -7167,8 +6352,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY; } - if ((val & UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY) == - (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY) { + if ((val & UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY; if (!first) { os << " | "; @@ -7178,8 +6362,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY; } - if ((val & UR_USM_ADVICE_FLAG_BIAS_CACHED) == - (uint32_t)UR_USM_ADVICE_FLAG_BIAS_CACHED) { + if ((val & UR_USM_ADVICE_FLAG_BIAS_CACHED) == (uint32_t)UR_USM_ADVICE_FLAG_BIAS_CACHED) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_BIAS_CACHED; if (!first) { os << " | "; @@ -7189,8 +6372,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_BIAS_CACHED; } - if ((val & UR_USM_ADVICE_FLAG_BIAS_UNCACHED) == - (uint32_t)UR_USM_ADVICE_FLAG_BIAS_UNCACHED) { + if ((val & UR_USM_ADVICE_FLAG_BIAS_UNCACHED) == (uint32_t)UR_USM_ADVICE_FLAG_BIAS_UNCACHED) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_BIAS_UNCACHED; if (!first) { os << " | "; @@ -7200,8 +6382,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_BIAS_UNCACHED; } - if ((val & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) == - (uint32_t)UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) { + if ((val & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) == (uint32_t)UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE; if (!first) { os << " | "; @@ -7211,8 +6392,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE; } - if ((val & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) == - (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) { + if ((val & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE; if (!first) { os << " | "; @@ -7222,8 +6402,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE; } - if ((val & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST) == - (uint32_t)UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST) { + if ((val & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST) == (uint32_t)UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST; if (!first) { os << " | "; @@ -7233,8 +6412,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST; } - if ((val & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST) == - (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST) { + if ((val & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST; if (!first) { os << " | "; @@ -7244,8 +6422,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST; } - if ((val & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST) == - (uint32_t)UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST) { + if ((val & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST) == (uint32_t)UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST; if (!first) { os << " | "; @@ -7255,8 +6432,7 @@ inline void serializeFlag(std::ostream &os, os << UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST; } - if ((val & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST) == - (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST) { + if ((val & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST) { val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST; if (!first) { os << " | "; @@ -7274,10 +6450,14 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_desc_t params) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_desc_t params) { os << "(struct ur_usm_desc_t){"; os << ".stype = "; @@ -7287,12 +6467,14 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".hints = "; - ur_params::serializeFlag(os, (params.hints)); + ur::details::printFlag(os, + (params.hints)); os << ", "; os << ".align = "; @@ -7302,8 +6484,11 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_host_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_host_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_host_desc_t params) { os << "(struct ur_usm_host_desc_t){"; os << ".stype = "; @@ -7313,18 +6498,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, (params.flags)); + ur::details::printFlag(os, + (params.flags)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_device_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_device_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_device_desc_t params) { os << "(struct ur_usm_device_desc_t){"; os << ".stype = "; @@ -7334,18 +6524,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, (params.flags)); + ur::details::printFlag(os, + (params.flags)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_pool_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_pool_desc_t params) { os << "(struct ur_usm_pool_desc_t){"; os << ".stype = "; @@ -7355,18 +6550,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, (params.flags)); + ur::details::printFlag(os, + (params.flags)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_pool_limits_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_limits_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_pool_limits_desc_t params) { os << "(struct ur_usm_pool_limits_desc_t){"; os << ".stype = "; @@ -7376,7 +6576,8 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".maxPoolableSize = "; @@ -7391,14 +6592,15 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_pool_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_pool_info_t value) { switch (value) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: os << "UR_USM_POOL_INFO_REFERENCE_COUNT"; break; - case UR_USM_POOL_INFO_CONTEXT: os << "UR_USM_POOL_INFO_CONTEXT"; break; @@ -7408,58 +6610,58 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_usm_pool_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_usm_pool_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_USM_POOL_INFO_CONTEXT: { const ur_context_handle_t *tptr = (const ur_context_handle_t *)ptr; if (sizeof(ur_context_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_virtual_mem_granularity_info_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_granularity_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_virtual_mem_granularity_info_t value) { + switch (value) { case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: os << "UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM"; break; - case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: os << "UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED"; break; @@ -7469,40 +6671,35 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_virtual_mem_granularity_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_virtual_mem_granularity_info_t value, - size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_virtual_mem_granularity_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -7510,22 +6707,24 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_virtual_mem_access_flag_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_access_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_virtual_mem_access_flag_t value) { + switch (value) { case UR_VIRTUAL_MEM_ACCESS_FLAG_NONE: os << "UR_VIRTUAL_MEM_ACCESS_FLAG_NONE"; break; - case UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE: os << "UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE"; break; - case UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY: os << "UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY"; break; @@ -7535,16 +6734,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_virtual_mem_access_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_VIRTUAL_MEM_ACCESS_FLAG_NONE) == - (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_NONE) { + if ((val & UR_VIRTUAL_MEM_ACCESS_FLAG_NONE) == (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_NONE) { val ^= (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_NONE; if (!first) { os << " | "; @@ -7554,8 +6753,7 @@ inline void serializeFlag(std::ostream &os, os << UR_VIRTUAL_MEM_ACCESS_FLAG_NONE; } - if ((val & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) == - (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) { + if ((val & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) == (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) { val ^= (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; if (!first) { os << " | "; @@ -7565,8 +6763,7 @@ inline void serializeFlag(std::ostream &os, os << UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; } - if ((val & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) == - (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) { + if ((val & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) == (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) { val ^= (uint32_t)UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; if (!first) { os << " | "; @@ -7584,12 +6781,15 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_virtual_mem_info_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_virtual_mem_info_t value) { switch (value) { - case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: os << "UR_VIRTUAL_MEM_INFO_ACCESS_MODE"; break; @@ -7599,42 +6799,43 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_virtual_mem_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_virtual_mem_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_virtual_mem_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { - const ur_virtual_mem_access_flags_t *tptr = - (const ur_virtual_mem_access_flags_t *)ptr; + const ur_virtual_mem_access_flags_t *tptr = (const ur_virtual_mem_access_flags_t *)ptr; if (sizeof(ur_virtual_mem_access_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_virtual_mem_access_flags_t) - << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_virtual_mem_access_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_physical_mem_flag_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_physical_mem_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_physical_mem_flag_t value) { + switch (value) { case UR_PHYSICAL_MEM_FLAG_TBD: os << "UR_PHYSICAL_MEM_FLAG_TBD"; break; @@ -7644,16 +6845,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_physical_mem_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_PHYSICAL_MEM_FLAG_TBD) == - (uint32_t)UR_PHYSICAL_MEM_FLAG_TBD) { + if ((val & UR_PHYSICAL_MEM_FLAG_TBD) == (uint32_t)UR_PHYSICAL_MEM_FLAG_TBD) { val ^= (uint32_t)UR_PHYSICAL_MEM_FLAG_TBD; if (!first) { os << " | "; @@ -7671,10 +6872,14 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, const struct ur_physical_mem_properties_t params) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_physical_mem_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_physical_mem_properties_t params) { os << "(struct ur_physical_mem_properties_t){"; os << ".stype = "; @@ -7684,32 +6889,33 @@ operator<<(std::ostream &os, const struct ur_physical_mem_properties_t params) { os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, (params.flags)); + ur::details::printFlag(os, + (params.flags)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_metadata_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_metadata_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_program_metadata_type_t value) { switch (value) { - case UR_PROGRAM_METADATA_TYPE_UINT32: os << "UR_PROGRAM_METADATA_TYPE_UINT32"; break; - case UR_PROGRAM_METADATA_TYPE_UINT64: os << "UR_PROGRAM_METADATA_TYPE_UINT64"; break; - case UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY: os << "UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY"; break; - case UR_PROGRAM_METADATA_TYPE_STRING: os << "UR_PROGRAM_METADATA_TYPE_STRING"; break; @@ -7719,11 +6925,14 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } +namespace ur::details { -inline void -ur_params::serializeUnion(std::ostream &os, - const union ur_program_metadata_value_t params, - const enum ur_program_metadata_type_t tag) { +/////////////////////////////////////////////////////////////////////////////// +// @brief Print ur_program_metadata_value_t union +inline ur_result_t printUnion( + std::ostream &os, + const union ur_program_metadata_value_t params, + const enum ur_program_metadata_type_t tag) { os << "(union ur_program_metadata_value_t){"; switch (tag) { @@ -7745,29 +6954,37 @@ ur_params::serializeUnion(std::ostream &os, os << ".pString = "; - ur_params::serializePtr(os, (params.pString)); + ur::details::printPtr(os, + (params.pString)); break; case UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY: os << ".pData = "; - ur_params::serializePtr(os, (params.pData)); + ur::details::printPtr(os, + (params.pData)); break; default: os << ""; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } os << "}"; -} -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_metadata_t params) { + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_metadata_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_program_metadata_t params) { os << "(struct ur_program_metadata_t){"; os << ".pName = "; - ur_params::serializePtr(os, (params.pName)); + ur::details::printPtr(os, + (params.pName)); os << ", "; os << ".type = "; @@ -7781,13 +6998,16 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".value = "; - ur_params::serializeUnion(os, (params.value), params.type); + ur::details::printUnion(os, (params.value), params.type); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_program_properties_t params) { os << "(struct ur_program_properties_t){"; os << ".stype = "; @@ -7797,7 +7017,8 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".count = "; @@ -7818,42 +7039,36 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_program_info_t value) { switch (value) { - case UR_PROGRAM_INFO_REFERENCE_COUNT: os << "UR_PROGRAM_INFO_REFERENCE_COUNT"; break; - case UR_PROGRAM_INFO_CONTEXT: os << "UR_PROGRAM_INFO_CONTEXT"; break; - case UR_PROGRAM_INFO_NUM_DEVICES: os << "UR_PROGRAM_INFO_NUM_DEVICES"; break; - case UR_PROGRAM_INFO_DEVICES: os << "UR_PROGRAM_INFO_DEVICES"; break; - case UR_PROGRAM_INFO_SOURCE: os << "UR_PROGRAM_INFO_SOURCE"; break; - case UR_PROGRAM_INFO_BINARY_SIZES: os << "UR_PROGRAM_INFO_BINARY_SIZES"; break; - case UR_PROGRAM_INFO_BINARIES: os << "UR_PROGRAM_INFO_BINARIES"; break; - case UR_PROGRAM_INFO_NUM_KERNELS: os << "UR_PROGRAM_INFO_NUM_KERNELS"; break; - case UR_PROGRAM_INFO_KERNEL_NAMES: os << "UR_PROGRAM_INFO_KERNEL_NAMES"; break; @@ -7863,59 +7078,53 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_program_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_program_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_program_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_PROGRAM_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROGRAM_INFO_CONTEXT: { const ur_context_handle_t *tptr = (const ur_context_handle_t *)ptr; if (sizeof(ur_context_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_PROGRAM_INFO_NUM_DEVICES: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROGRAM_INFO_DEVICES: { const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; @@ -7926,17 +7135,16 @@ inline void serializeTagged(std::ostream &os, const void *ptr, os << ", "; } - ur_params::serializePtr(os, tptr[i]); + ur::details::printPtr(os, + tptr[i]); } os << "}"; } break; - case UR_PROGRAM_INFO_SOURCE: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PROGRAM_INFO_BINARY_SIZES: { const size_t *tptr = (const size_t *)ptr; @@ -7951,54 +7159,51 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_PROGRAM_INFO_BINARIES: { const unsigned char *tptr = (const unsigned char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PROGRAM_INFO_NUM_KERNELS: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROGRAM_INFO_KERNEL_NAMES: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_build_status_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_build_status_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_program_build_status_t value) { + switch (value) { case UR_PROGRAM_BUILD_STATUS_NONE: os << "UR_PROGRAM_BUILD_STATUS_NONE"; break; - case UR_PROGRAM_BUILD_STATUS_ERROR: os << "UR_PROGRAM_BUILD_STATUS_ERROR"; break; - case UR_PROGRAM_BUILD_STATUS_SUCCESS: os << "UR_PROGRAM_BUILD_STATUS_SUCCESS"; break; - case UR_PROGRAM_BUILD_STATUS_IN_PROGRESS: os << "UR_PROGRAM_BUILD_STATUS_IN_PROGRESS"; break; @@ -8008,22 +7213,21 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_binary_type_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_binary_type_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_program_binary_type_t value) { switch (value) { - case UR_PROGRAM_BINARY_TYPE_NONE: os << "UR_PROGRAM_BINARY_TYPE_NONE"; break; - case UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: os << "UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT"; break; - case UR_PROGRAM_BINARY_TYPE_LIBRARY: os << "UR_PROGRAM_BINARY_TYPE_LIBRARY"; break; - case UR_PROGRAM_BINARY_TYPE_EXECUTABLE: os << "UR_PROGRAM_BINARY_TYPE_EXECUTABLE"; break; @@ -8033,22 +7237,21 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_program_build_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_build_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_program_build_info_t value) { switch (value) { - case UR_PROGRAM_BUILD_INFO_STATUS: os << "UR_PROGRAM_BUILD_INFO_STATUS"; break; - case UR_PROGRAM_BUILD_INFO_OPTIONS: os << "UR_PROGRAM_BUILD_INFO_OPTIONS"; break; - case UR_PROGRAM_BUILD_INFO_LOG: os << "UR_PROGRAM_BUILD_INFO_LOG"; break; - case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: os << "UR_PROGRAM_BUILD_INFO_BINARY_TYPE"; break; @@ -8058,53 +7261,45 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_program_build_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_program_build_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_program_build_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_PROGRAM_BUILD_INFO_STATUS: { - const ur_program_build_status_t *tptr = - (const ur_program_build_status_t *)ptr; + const ur_program_build_status_t *tptr = (const ur_program_build_status_t *)ptr; if (sizeof(ur_program_build_status_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_program_build_status_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_program_build_status_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROGRAM_BUILD_INFO_OPTIONS: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PROGRAM_BUILD_INFO_LOG: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: { - const ur_program_binary_type_t *tptr = - (const ur_program_binary_type_t *)ptr; + const ur_program_binary_type_t *tptr = (const ur_program_binary_type_t *)ptr; if (sizeof(ur_program_binary_type_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_program_binary_type_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_program_binary_type_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8112,13 +7307,17 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_specialization_constant_info_t params) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_specialization_constant_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_specialization_constant_info_t params) { os << "(struct ur_specialization_constant_info_t){"; os << ".id = "; @@ -8133,14 +7332,17 @@ operator<<(std::ostream &os, os << ", "; os << ".pValue = "; - ur_params::serializePtr(os, (params.pValue)); + ur::details::printPtr(os, + (params.pValue)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_native_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_program_native_properties_t params) { os << "(struct ur_program_native_properties_t){"; os << ".stype = "; @@ -8150,7 +7352,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -8160,9 +7363,11 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_value_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_arg_value_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_arg_value_properties_t params) { os << "(struct ur_kernel_arg_value_properties_t){"; os << ".stype = "; @@ -8172,14 +7377,17 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_local_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_arg_local_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_arg_local_properties_t params) { os << "(struct ur_kernel_arg_local_properties_t){"; os << ".stype = "; @@ -8189,38 +7397,36 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_kernel_info_t value) { switch (value) { - case UR_KERNEL_INFO_FUNCTION_NAME: os << "UR_KERNEL_INFO_FUNCTION_NAME"; break; - case UR_KERNEL_INFO_NUM_ARGS: os << "UR_KERNEL_INFO_NUM_ARGS"; break; - case UR_KERNEL_INFO_REFERENCE_COUNT: os << "UR_KERNEL_INFO_REFERENCE_COUNT"; break; - case UR_KERNEL_INFO_CONTEXT: os << "UR_KERNEL_INFO_CONTEXT"; break; - case UR_KERNEL_INFO_PROGRAM: os << "UR_KERNEL_INFO_PROGRAM"; break; - case UR_KERNEL_INFO_ATTRIBUTES: os << "UR_KERNEL_INFO_ATTRIBUTES"; break; - case UR_KERNEL_INFO_NUM_REGS: os << "UR_KERNEL_INFO_NUM_REGS"; break; @@ -8230,93 +7436,83 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_info_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_kernel_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_KERNEL_INFO_FUNCTION_NAME: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_KERNEL_INFO_NUM_ARGS: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_INFO_CONTEXT: { const ur_context_handle_t *tptr = (const ur_context_handle_t *)ptr; if (sizeof(ur_context_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_KERNEL_INFO_PROGRAM: { const ur_program_handle_t *tptr = (const ur_program_handle_t *)ptr; if (sizeof(ur_program_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_program_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_program_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_KERNEL_INFO_ATTRIBUTES: { const char *tptr = (const char *)ptr; - serializePtr(os, tptr); + printPtr(os, tptr); } break; - case UR_KERNEL_INFO_NUM_REGS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8324,34 +7520,33 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_group_info_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_group_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_kernel_group_info_t value) { + switch (value) { case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: os << "UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE"; break; - case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: os << "UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE"; break; - case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: os << "UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE"; break; - case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: os << "UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE"; break; - case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: os << "UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE"; break; - case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: os << "UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE"; break; @@ -8361,17 +7556,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_kernel_group_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_group_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_group_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { const size_t *tptr = (const size_t *)ptr; @@ -8386,21 +7580,18 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { const size_t *tptr = (const size_t *)ptr; @@ -8415,43 +7606,37 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { const size_t *tptr = (const size_t *)ptr; if (sizeof(size_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(size_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8459,26 +7644,27 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_sub_group_info_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_sub_group_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_kernel_sub_group_info_t value) { + switch (value) { case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: os << "UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE"; break; - case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: os << "UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS"; break; - case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: os << "UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS"; break; - case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: os << "UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL"; break; @@ -8488,67 +7674,59 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_kernel_sub_group_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_sub_group_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_sub_group_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8556,22 +7734,24 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_cache_config_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_cache_config_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_kernel_cache_config_t value) { + switch (value) { case UR_KERNEL_CACHE_CONFIG_DEFAULT: os << "UR_KERNEL_CACHE_CONFIG_DEFAULT"; break; - case UR_KERNEL_CACHE_CONFIG_LARGE_SLM: os << "UR_KERNEL_CACHE_CONFIG_LARGE_SLM"; break; - case UR_KERNEL_CACHE_CONFIG_LARGE_DATA: os << "UR_KERNEL_CACHE_CONFIG_LARGE_DATA"; break; @@ -8581,18 +7761,18 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_kernel_exec_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_exec_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_kernel_exec_info_t value) { switch (value) { - case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: os << "UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS"; break; - case UR_KERNEL_EXEC_INFO_USM_PTRS: os << "UR_KERNEL_EXEC_INFO_USM_PTRS"; break; - case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: os << "UR_KERNEL_EXEC_INFO_CACHE_CONFIG"; break; @@ -8602,34 +7782,31 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_kernel_exec_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_kernel_exec_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_exec_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_KERNEL_EXEC_INFO_USM_PTRS: { - const void **tptr = (const void **)ptr; + const void *const *tptr = (const void *const *)ptr; os << "{"; size_t nelems = size / sizeof(void *); for (size_t i = 0; i < nelems; ++i) { @@ -8641,16 +7818,13 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } os << "}"; } break; - case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: { - const ur_kernel_cache_config_t *tptr = - (const ur_kernel_cache_config_t *)ptr; + const ur_kernel_cache_config_t *tptr = (const ur_kernel_cache_config_t *)ptr; if (sizeof(ur_kernel_cache_config_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_kernel_cache_config_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_kernel_cache_config_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8658,13 +7832,17 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_pointer_properties_t params) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_arg_pointer_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_arg_pointer_properties_t params) { os << "(struct ur_kernel_arg_pointer_properties_t){"; os << ".stype = "; @@ -8674,14 +7852,17 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_exec_info_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_exec_info_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_exec_info_properties_t params) { os << "(struct ur_kernel_exec_info_properties_t){"; os << ".stype = "; @@ -8691,14 +7872,17 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_sampler_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_arg_sampler_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_arg_sampler_properties_t params) { os << "(struct ur_kernel_arg_sampler_properties_t){"; os << ".stype = "; @@ -8708,14 +7892,17 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_mem_obj_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_arg_mem_obj_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_arg_mem_obj_properties_t params) { os << "(struct ur_kernel_arg_mem_obj_properties_t){"; os << ".stype = "; @@ -8725,19 +7912,23 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".memoryAccess = "; - ur_params::serializeFlag(os, (params.memoryAccess)); + ur::details::printFlag(os, + (params.memoryAccess)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_native_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_kernel_native_properties_t params) { os << "(struct ur_kernel_native_properties_t){"; os << ".stype = "; @@ -8747,7 +7938,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -8757,33 +7949,30 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_queue_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_queue_info_t value) { switch (value) { - case UR_QUEUE_INFO_CONTEXT: os << "UR_QUEUE_INFO_CONTEXT"; break; - case UR_QUEUE_INFO_DEVICE: os << "UR_QUEUE_INFO_DEVICE"; break; - case UR_QUEUE_INFO_DEVICE_DEFAULT: os << "UR_QUEUE_INFO_DEVICE_DEFAULT"; break; - case UR_QUEUE_INFO_FLAGS: os << "UR_QUEUE_INFO_FLAGS"; break; - case UR_QUEUE_INFO_REFERENCE_COUNT: os << "UR_QUEUE_INFO_REFERENCE_COUNT"; break; - case UR_QUEUE_INFO_SIZE: os << "UR_QUEUE_INFO_SIZE"; break; - case UR_QUEUE_INFO_EMPTY: os << "UR_QUEUE_INFO_EMPTY"; break; @@ -8793,109 +7982,99 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_queue_info_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_queue_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_queue_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_queue_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_QUEUE_INFO_CONTEXT: { const ur_queue_handle_t *tptr = (const ur_queue_handle_t *)ptr; if (sizeof(ur_queue_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_QUEUE_INFO_DEVICE: { const ur_device_handle_t *tptr = (const ur_device_handle_t *)ptr; if (sizeof(ur_device_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_QUEUE_INFO_DEVICE_DEFAULT: { const ur_queue_handle_t *tptr = (const ur_queue_handle_t *)ptr; if (sizeof(ur_queue_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_QUEUE_INFO_FLAGS: { const ur_queue_flags_t *tptr = (const ur_queue_flags_t *)ptr; if (sizeof(ur_queue_flags_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializeFlag(os, *tptr); + ur::details::printFlag(os, + *tptr); os << ")"; } break; - case UR_QUEUE_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_QUEUE_INFO_SIZE: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_QUEUE_INFO_EMPTY: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_bool_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8903,53 +8082,48 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_queue_flag_t value) { + switch (value) { case UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE: os << "UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE"; break; - case UR_QUEUE_FLAG_PROFILING_ENABLE: os << "UR_QUEUE_FLAG_PROFILING_ENABLE"; break; - case UR_QUEUE_FLAG_ON_DEVICE: os << "UR_QUEUE_FLAG_ON_DEVICE"; break; - case UR_QUEUE_FLAG_ON_DEVICE_DEFAULT: os << "UR_QUEUE_FLAG_ON_DEVICE_DEFAULT"; break; - case UR_QUEUE_FLAG_DISCARD_EVENTS: os << "UR_QUEUE_FLAG_DISCARD_EVENTS"; break; - case UR_QUEUE_FLAG_PRIORITY_LOW: os << "UR_QUEUE_FLAG_PRIORITY_LOW"; break; - case UR_QUEUE_FLAG_PRIORITY_HIGH: os << "UR_QUEUE_FLAG_PRIORITY_HIGH"; break; - case UR_QUEUE_FLAG_SUBMISSION_BATCHED: os << "UR_QUEUE_FLAG_SUBMISSION_BATCHED"; break; - case UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE: os << "UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE"; break; - case UR_QUEUE_FLAG_USE_DEFAULT_STREAM: os << "UR_QUEUE_FLAG_USE_DEFAULT_STREAM"; break; - case UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM: os << "UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM"; break; @@ -8959,15 +8133,16 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_queue_flag_t flag template <> -inline void serializeFlag(std::ostream &os, uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == - (uint32_t)UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + if ((val & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == (uint32_t)UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { val ^= (uint32_t)UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; if (!first) { os << " | "; @@ -8977,8 +8152,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; } - if ((val & UR_QUEUE_FLAG_PROFILING_ENABLE) == - (uint32_t)UR_QUEUE_FLAG_PROFILING_ENABLE) { + if ((val & UR_QUEUE_FLAG_PROFILING_ENABLE) == (uint32_t)UR_QUEUE_FLAG_PROFILING_ENABLE) { val ^= (uint32_t)UR_QUEUE_FLAG_PROFILING_ENABLE; if (!first) { os << " | "; @@ -8998,8 +8172,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_ON_DEVICE; } - if ((val & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) == - (uint32_t)UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { + if ((val & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) == (uint32_t)UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { val ^= (uint32_t)UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; if (!first) { os << " | "; @@ -9009,8 +8182,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; } - if ((val & UR_QUEUE_FLAG_DISCARD_EVENTS) == - (uint32_t)UR_QUEUE_FLAG_DISCARD_EVENTS) { + if ((val & UR_QUEUE_FLAG_DISCARD_EVENTS) == (uint32_t)UR_QUEUE_FLAG_DISCARD_EVENTS) { val ^= (uint32_t)UR_QUEUE_FLAG_DISCARD_EVENTS; if (!first) { os << " | "; @@ -9020,8 +8192,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_DISCARD_EVENTS; } - if ((val & UR_QUEUE_FLAG_PRIORITY_LOW) == - (uint32_t)UR_QUEUE_FLAG_PRIORITY_LOW) { + if ((val & UR_QUEUE_FLAG_PRIORITY_LOW) == (uint32_t)UR_QUEUE_FLAG_PRIORITY_LOW) { val ^= (uint32_t)UR_QUEUE_FLAG_PRIORITY_LOW; if (!first) { os << " | "; @@ -9031,8 +8202,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_PRIORITY_LOW; } - if ((val & UR_QUEUE_FLAG_PRIORITY_HIGH) == - (uint32_t)UR_QUEUE_FLAG_PRIORITY_HIGH) { + if ((val & UR_QUEUE_FLAG_PRIORITY_HIGH) == (uint32_t)UR_QUEUE_FLAG_PRIORITY_HIGH) { val ^= (uint32_t)UR_QUEUE_FLAG_PRIORITY_HIGH; if (!first) { os << " | "; @@ -9042,8 +8212,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_PRIORITY_HIGH; } - if ((val & UR_QUEUE_FLAG_SUBMISSION_BATCHED) == - (uint32_t)UR_QUEUE_FLAG_SUBMISSION_BATCHED) { + if ((val & UR_QUEUE_FLAG_SUBMISSION_BATCHED) == (uint32_t)UR_QUEUE_FLAG_SUBMISSION_BATCHED) { val ^= (uint32_t)UR_QUEUE_FLAG_SUBMISSION_BATCHED; if (!first) { os << " | "; @@ -9053,8 +8222,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_SUBMISSION_BATCHED; } - if ((val & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) == - (uint32_t)UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) { + if ((val & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) == (uint32_t)UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) { val ^= (uint32_t)UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE; if (!first) { os << " | "; @@ -9064,8 +8232,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE; } - if ((val & UR_QUEUE_FLAG_USE_DEFAULT_STREAM) == - (uint32_t)UR_QUEUE_FLAG_USE_DEFAULT_STREAM) { + if ((val & UR_QUEUE_FLAG_USE_DEFAULT_STREAM) == (uint32_t)UR_QUEUE_FLAG_USE_DEFAULT_STREAM) { val ^= (uint32_t)UR_QUEUE_FLAG_USE_DEFAULT_STREAM; if (!first) { os << " | "; @@ -9075,8 +8242,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_QUEUE_FLAG_USE_DEFAULT_STREAM; } - if ((val & UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM) == - (uint32_t)UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM) { + if ((val & UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM) == (uint32_t)UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM) { val ^= (uint32_t)UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; if (!first) { os << " | "; @@ -9094,10 +8260,14 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_properties_t params) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_queue_properties_t params) { os << "(struct ur_queue_properties_t){"; os << ".stype = "; @@ -9107,18 +8277,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, (params.flags)); + ur::details::printFlag(os, + (params.flags)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_index_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_index_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_queue_index_properties_t params) { os << "(struct ur_queue_index_properties_t){"; os << ".stype = "; @@ -9128,7 +8303,8 @@ operator<<(std::ostream &os, const struct ur_queue_index_properties_t params) { os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".computeIndex = "; @@ -9138,8 +8314,11 @@ operator<<(std::ostream &os, const struct ur_queue_index_properties_t params) { os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_native_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_native_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_queue_native_desc_t params) { os << "(struct ur_queue_native_desc_t){"; os << ".stype = "; @@ -9149,18 +8328,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".pNativeData = "; - ur_params::serializePtr(os, (params.pNativeData)); + ur::details::printPtr(os, + (params.pNativeData)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_native_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_queue_native_properties_t params) { os << "(struct ur_queue_native_properties_t){"; os << ".stype = "; @@ -9170,7 +8354,8 @@ operator<<(std::ostream &os, const struct ur_queue_native_properties_t params) { os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -9180,117 +8365,93 @@ operator<<(std::ostream &os, const struct ur_queue_native_properties_t params) { os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_command_t value) { switch (value) { - case UR_COMMAND_KERNEL_LAUNCH: os << "UR_COMMAND_KERNEL_LAUNCH"; break; - case UR_COMMAND_EVENTS_WAIT: os << "UR_COMMAND_EVENTS_WAIT"; break; - case UR_COMMAND_EVENTS_WAIT_WITH_BARRIER: os << "UR_COMMAND_EVENTS_WAIT_WITH_BARRIER"; break; - case UR_COMMAND_MEM_BUFFER_READ: os << "UR_COMMAND_MEM_BUFFER_READ"; break; - case UR_COMMAND_MEM_BUFFER_WRITE: os << "UR_COMMAND_MEM_BUFFER_WRITE"; break; - case UR_COMMAND_MEM_BUFFER_READ_RECT: os << "UR_COMMAND_MEM_BUFFER_READ_RECT"; break; - case UR_COMMAND_MEM_BUFFER_WRITE_RECT: os << "UR_COMMAND_MEM_BUFFER_WRITE_RECT"; break; - case UR_COMMAND_MEM_BUFFER_COPY: os << "UR_COMMAND_MEM_BUFFER_COPY"; break; - case UR_COMMAND_MEM_BUFFER_COPY_RECT: os << "UR_COMMAND_MEM_BUFFER_COPY_RECT"; break; - case UR_COMMAND_MEM_BUFFER_FILL: os << "UR_COMMAND_MEM_BUFFER_FILL"; break; - case UR_COMMAND_MEM_IMAGE_READ: os << "UR_COMMAND_MEM_IMAGE_READ"; break; - case UR_COMMAND_MEM_IMAGE_WRITE: os << "UR_COMMAND_MEM_IMAGE_WRITE"; break; - case UR_COMMAND_MEM_IMAGE_COPY: os << "UR_COMMAND_MEM_IMAGE_COPY"; break; - case UR_COMMAND_MEM_BUFFER_MAP: os << "UR_COMMAND_MEM_BUFFER_MAP"; break; - case UR_COMMAND_MEM_UNMAP: os << "UR_COMMAND_MEM_UNMAP"; break; - case UR_COMMAND_USM_FILL: os << "UR_COMMAND_USM_FILL"; break; - case UR_COMMAND_USM_MEMCPY: os << "UR_COMMAND_USM_MEMCPY"; break; - case UR_COMMAND_USM_PREFETCH: os << "UR_COMMAND_USM_PREFETCH"; break; - case UR_COMMAND_USM_ADVISE: os << "UR_COMMAND_USM_ADVISE"; break; - case UR_COMMAND_USM_FILL_2D: os << "UR_COMMAND_USM_FILL_2D"; break; - case UR_COMMAND_USM_MEMCPY_2D: os << "UR_COMMAND_USM_MEMCPY_2D"; break; - case UR_COMMAND_DEVICE_GLOBAL_VARIABLE_WRITE: os << "UR_COMMAND_DEVICE_GLOBAL_VARIABLE_WRITE"; break; - case UR_COMMAND_DEVICE_GLOBAL_VARIABLE_READ: os << "UR_COMMAND_DEVICE_GLOBAL_VARIABLE_READ"; break; - case UR_COMMAND_READ_HOST_PIPE: os << "UR_COMMAND_READ_HOST_PIPE"; break; - case UR_COMMAND_WRITE_HOST_PIPE: os << "UR_COMMAND_WRITE_HOST_PIPE"; break; - case UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP: os << "UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP"; break; - case UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP: os << "UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP"; break; - case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP: os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP"; break; @@ -9300,22 +8461,21 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) { } return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_event_status_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_status_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_event_status_t value) { switch (value) { - case UR_EVENT_STATUS_COMPLETE: os << "UR_EVENT_STATUS_COMPLETE"; break; - case UR_EVENT_STATUS_RUNNING: os << "UR_EVENT_STATUS_RUNNING"; break; - case UR_EVENT_STATUS_SUBMITTED: os << "UR_EVENT_STATUS_SUBMITTED"; break; - case UR_EVENT_STATUS_QUEUED: os << "UR_EVENT_STATUS_QUEUED"; break; @@ -9325,25 +8485,24 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_event_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_event_info_t value) { switch (value) { - case UR_EVENT_INFO_COMMAND_QUEUE: os << "UR_EVENT_INFO_COMMAND_QUEUE"; break; - case UR_EVENT_INFO_CONTEXT: os << "UR_EVENT_INFO_CONTEXT"; break; - case UR_EVENT_INFO_COMMAND_TYPE: os << "UR_EVENT_INFO_COMMAND_TYPE"; break; - case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: os << "UR_EVENT_INFO_COMMAND_EXECUTION_STATUS"; break; - case UR_EVENT_INFO_REFERENCE_COUNT: os << "UR_EVENT_INFO_REFERENCE_COUNT"; break; @@ -9353,81 +8512,73 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_event_info_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_event_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_event_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_event_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_EVENT_INFO_COMMAND_QUEUE: { const ur_queue_handle_t *tptr = (const ur_queue_handle_t *)ptr; if (sizeof(ur_queue_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_EVENT_INFO_CONTEXT: { const ur_context_handle_t *tptr = (const ur_context_handle_t *)ptr; if (sizeof(ur_context_handle_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; - ur_params::serializePtr(os, *tptr); + ur::details::printPtr(os, + *tptr); os << ")"; } break; - case UR_EVENT_INFO_COMMAND_TYPE: { const ur_command_t *tptr = (const ur_command_t *)ptr; if (sizeof(ur_command_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_command_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_command_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: { const ur_event_status_t *tptr = (const ur_event_status_t *)ptr; if (sizeof(ur_event_status_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(ur_event_status_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_event_status_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_EVENT_INFO_REFERENCE_COUNT: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9435,30 +8586,30 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_profiling_info_t value) { - switch (value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_profiling_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_profiling_info_t value) { + switch (value) { case UR_PROFILING_INFO_COMMAND_QUEUED: os << "UR_PROFILING_INFO_COMMAND_QUEUED"; break; - case UR_PROFILING_INFO_COMMAND_SUBMIT: os << "UR_PROFILING_INFO_COMMAND_SUBMIT"; break; - case UR_PROFILING_INFO_COMMAND_START: os << "UR_PROFILING_INFO_COMMAND_START"; break; - case UR_PROFILING_INFO_COMMAND_END: os << "UR_PROFILING_INFO_COMMAND_END"; break; - case UR_PROFILING_INFO_COMMAND_COMPLETE: os << "UR_PROFILING_INFO_COMMAND_COMPLETE"; break; @@ -9468,81 +8619,71 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_profiling_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_profiling_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_profiling_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_PROFILING_INFO_COMMAND_QUEUED: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROFILING_INFO_COMMAND_SUBMIT: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROFILING_INFO_COMMAND_START: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROFILING_INFO_COMMAND_END: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_PROFILING_INFO_COMMAND_COMPLETE: { const uint64_t *tptr = (const uint64_t *)ptr; if (sizeof(uint64_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint64_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint64_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9550,12 +8691,17 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_native_properties_t params) { +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_native_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_event_native_properties_t params) { os << "(struct ur_event_native_properties_t){"; os << ".stype = "; @@ -9565,7 +8711,8 @@ operator<<(std::ostream &os, const struct ur_event_native_properties_t params) { os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".isNativeHandleOwned = "; @@ -9575,24 +8722,23 @@ operator<<(std::ostream &os, const struct ur_event_native_properties_t params) { os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_execution_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_execution_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_execution_info_t value) { switch (value) { - - case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE"; + case UR_EXECUTION_INFO_COMPLETE: + os << "UR_EXECUTION_INFO_COMPLETE"; break; - - case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING"; + case UR_EXECUTION_INFO_RUNNING: + os << "UR_EXECUTION_INFO_RUNNING"; break; - - case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED"; + case UR_EXECUTION_INFO_SUBMITTED: + os << "UR_EXECUTION_INFO_SUBMITTED"; break; - - case UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED"; + case UR_EXECUTION_INFO_QUEUED: + os << "UR_EXECUTION_INFO_QUEUED"; break; default: os << "unknown enumerator"; @@ -9600,17 +8746,18 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_map_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_map_flag_t value) { switch (value) { - case UR_MAP_FLAG_READ: os << "UR_MAP_FLAG_READ"; break; - case UR_MAP_FLAG_WRITE: os << "UR_MAP_FLAG_WRITE"; break; - case UR_MAP_FLAG_WRITE_INVALIDATE_REGION: os << "UR_MAP_FLAG_WRITE_INVALIDATE_REGION"; break; @@ -9620,10 +8767,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value) { } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_map_flag_t flag template <> -inline void serializeFlag(std::ostream &os, uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; @@ -9647,8 +8796,7 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { os << UR_MAP_FLAG_WRITE; } - if ((val & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) == - (uint32_t)UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { + if ((val & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) == (uint32_t)UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { val ^= (uint32_t)UR_MAP_FLAG_WRITE_INVALIDATE_REGION; if (!first) { os << " | "; @@ -9666,12 +8814,15 @@ inline void serializeFlag(std::ostream &os, uint32_t flag) { } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_usm_migration_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_migration_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_usm_migration_flag_t value) { switch (value) { - case UR_USM_MIGRATION_FLAG_DEFAULT: os << "UR_USM_MIGRATION_FLAG_DEFAULT"; break; @@ -9681,16 +8832,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_migration_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_USM_MIGRATION_FLAG_DEFAULT) == - (uint32_t)UR_USM_MIGRATION_FLAG_DEFAULT) { + if ((val & UR_USM_MIGRATION_FLAG_DEFAULT) == (uint32_t)UR_USM_MIGRATION_FLAG_DEFAULT) { val ^= (uint32_t)UR_USM_MIGRATION_FLAG_DEFAULT; if (!first) { os << " | "; @@ -9708,20 +8859,21 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_image_copy_flag_t value) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_image_copy_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_exp_image_copy_flag_t value) { switch (value) { - case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: os << "UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE"; break; - case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST: os << "UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST"; break; - case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: os << "UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE"; break; @@ -9731,16 +8883,16 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_image_copy_flag_t flag template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { uint32_t val = flag; bool first = true; - if ((val & UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) == - (uint32_t)UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { + if ((val & UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) == (uint32_t)UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { val ^= (uint32_t)UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; if (!first) { os << " | "; @@ -9750,8 +8902,7 @@ inline void serializeFlag(std::ostream &os, os << UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; } - if ((val & UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) == - (uint32_t)UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + if ((val & UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) == (uint32_t)UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { val ^= (uint32_t)UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; if (!first) { os << " | "; @@ -9761,8 +8912,7 @@ inline void serializeFlag(std::ostream &os, os << UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; } - if ((val & UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE) == - (uint32_t)UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE) { + if ((val & UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE) == (uint32_t)UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE) { val ^= (uint32_t)UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; if (!first) { os << " | "; @@ -9780,10 +8930,14 @@ inline void serializeFlag(std::ostream &os, } else if (first) { os << "0"; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_file_descriptor_t params) { +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_file_descriptor_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_file_descriptor_t params) { os << "(struct ur_exp_file_descriptor_t){"; os << ".stype = "; @@ -9793,7 +8947,8 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".fd = "; @@ -9803,8 +8958,11 @@ inline std::ostream &operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_win32_handle_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_win32_handle_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_win32_handle_t params) { os << "(struct ur_exp_win32_handle_t){"; os << ".stype = "; @@ -9814,19 +8972,23 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".handle = "; - ur_params::serializePtr(os, (params.handle)); + ur::details::printPtr(os, + (params.handle)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_exp_sampler_mip_properties_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_sampler_mip_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_sampler_mip_properties_t params) { os << "(struct ur_exp_sampler_mip_properties_t){"; os << ".stype = "; @@ -9836,7 +8998,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << ", "; os << ".minMipmapLevelClamp = "; @@ -9861,8 +9024,42 @@ operator<<(std::ostream &os, os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_interop_mem_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_sampler_addr_modes_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_sampler_addr_modes_t params) { + os << "(struct ur_exp_sampler_addr_modes_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".addrModes = {"; + for (auto i = 0; i < 3; i++) { + if (i != 0) { + os << ", "; + } + + os << (params.addrModes[i]); + } + os << "}"; + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_interop_mem_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_mem_desc_t params) { os << "(struct ur_exp_interop_mem_desc_t){"; os << ".stype = "; @@ -9872,14 +9069,17 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_exp_interop_semaphore_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_interop_semaphore_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_semaphore_desc_t params) { os << "(struct ur_exp_interop_semaphore_desc_t){"; os << ".stype = "; @@ -9889,13 +9089,42 @@ operator<<(std::ostream &os, os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_layered_image_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_layered_image_properties_t params) { + os << "(struct ur_exp_layered_image_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".numLayers = "; + + os << (params.numLayers); os << "}"; return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_exp_command_buffer_desc_t params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_desc_t params) { os << "(struct ur_exp_command_buffer_desc_t){"; os << ".stype = "; @@ -9905,19 +9134,21 @@ operator<<(std::ostream &os, const struct ur_exp_command_buffer_desc_t params) { os << ", "; os << ".pNext = "; - ur_params::serializeStruct(os, (params.pNext)); + ur::details::printStruct(os, + (params.pNext)); os << "}"; return os; } -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_peer_info_t value) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_peer_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value) { switch (value) { - case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: os << "UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED"; break; - case UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED: os << "UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED"; break; @@ -9927,39 +9158,35 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } -namespace ur_params { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_peer_info_t enum value template <> -inline void serializeTagged(std::ostream &os, const void *ptr, - ur_exp_peer_info_t value, size_t size) { +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { - case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; os << ")"; } break; - case UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED: { const uint32_t *tptr = (const uint32_t *)ptr; if (sizeof(uint32_t) > size) { - os << "invalid size (is: " << size - << ", expected: >=" << sizeof(uint32_t) << ")"; - return; + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9967,112 +9194,192 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; default: os << "unknown enumerator"; - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } + return UR_RESULT_SUCCESS; } -} // namespace ur_params +} // namespace ur::details -inline std::ostream &operator<<(std::ostream &os, - const struct ur_init_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_config_create_params_t *params) { - os << ".device_flags = "; + os << ".phLoaderConfig = "; - ur_params::serializeFlag(os, - *(params->pdevice_flags)); + ur::details::printPtr(os, + *(params->pphLoaderConfig)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_config_retain_params_t *params) { - os << ", "; os << ".hLoaderConfig = "; - ur_params::serializePtr(os, *(params->phLoaderConfig)); + ur::details::printPtr(os, + *(params->phLoaderConfig)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_tear_down_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_config_release_params_t *params) { - os << ".pParams = "; + os << ".hLoaderConfig = "; - ur_params::serializePtr(os, *(params->ppParams)); + ur::details::printPtr(os, + *(params->phLoaderConfig)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_adapter_get_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_config_get_info_params_t *params) { - os << ".NumEntries = "; + os << ".hLoaderConfig = "; - os << *(params->pNumEntries); + ur::details::printPtr(os, + *(params->phLoaderConfig)); os << ", "; - os << ".phAdapters = {"; - for (size_t i = 0; - *(params->pphAdapters) != NULL && i < *params->pNumEntries; ++i) { - if (i != 0) { - os << ", "; - } + os << ".propName = "; - ur_params::serializePtr(os, (*(params->pphAdapters))[i]); - } - os << "}"; + os << *(params->ppropName); os << ", "; - os << ".pNumAdapters = "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); - ur_params::serializePtr(os, *(params->ppNumAdapters)); + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_adapter_release_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_enable_layer_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_config_enable_layer_params_t *params) { - os << ".hAdapter = "; + os << ".hLoaderConfig = "; + + ur::details::printPtr(os, + *(params->phLoaderConfig)); + + os << ", "; + os << ".pLayerName = "; - ur_params::serializePtr(os, *(params->phAdapter)); + ur::details::printPtr(os, + *(params->ppLayerName)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_adapter_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_config_set_code_location_callback_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_config_set_code_location_callback_params_t *params) { - os << ".hAdapter = "; + os << ".hLoaderConfig = "; + + ur::details::printPtr(os, + *(params->phLoaderConfig)); + + os << ", "; + os << ".pfnCodeloc = "; + + os << reinterpret_cast( + *(params->ppfnCodeloc)); + + os << ", "; + os << ".pUserData = "; - ur_params::serializePtr(os, *(params->phAdapter)); + ur::details::printPtr(os, + *(params->ppUserData)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_adapter_get_last_error_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_get_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_get_params_t *params) { - os << ".hAdapter = "; + os << ".phAdapters = {"; + for (size_t i = 0; *(params->pphAdapters) != NULL && i < *params->pNumAdapters; ++i) { + if (i != 0) { + os << ", "; + } - ur_params::serializePtr(os, *(params->phAdapter)); + ur::details::printPtr(os, + (*(params->pphAdapters))[i]); + } + os << "}"; os << ", "; - os << ".ppMessage = "; + os << ".NumAdapters = "; - ur_params::serializePtr(os, *(params->pppMessage)); + os << *(params->pNumAdapters); os << ", "; - os << ".pError = "; + os << ".NumEntries = "; + + os << *(params->pNumEntries); - ur_params::serializePtr(os, *(params->ppError)); + os << ", "; + os << ".phPlatforms = {"; + for (size_t i = 0; *(params->pphPlatforms) != NULL && i < *params->pNumEntries; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphPlatforms))[i]); + } + os << "}"; + + os << ", "; + os << ".pNumPlatforms = "; + + ur::details::printPtr(os, + *(params->ppNumPlatforms)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_adapter_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_get_info_params_t *params) { - os << ".hAdapter = "; + os << ".hPlatform = "; - ur_params::serializePtr(os, *(params->phAdapter)); + ur::details::printPtr(os, + *(params->phPlatform)); os << ", "; os << ".propName = "; @@ -10086,1296 +9393,2140 @@ operator<<(std::ostream &os, os << ", "; os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t - *params) { - - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_get_native_handle_params_t *params) { - os << ", "; - os << ".hDevice = "; + os << ".hPlatform = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phPlatform)); os << ", "; - os << ".hImage = "; + os << ".phNativePlatform = "; - ur_params::serializePtr(os, *(params->phImage)); + ur::details::printPtr(os, + *(params->pphNativePlatform)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_create_with_native_handle_params_t *params) { - os << ".hContext = "; + os << ".hNativePlatform = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phNativePlatform)); os << ", "; - os << ".hDevice = "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".hImage = "; + os << ".phPlatform = "; - ur_params::serializePtr(os, *(params->phImage)); + ur::details::printPtr(os, + *(params->pphPlatform)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_image_allocate_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_get_api_version_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_get_api_version_params_t *params) { - os << ".hContext = "; + os << ".hPlatform = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phPlatform)); os << ", "; - os << ".hDevice = "; + os << ".pVersion = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->ppVersion)); - os << ", "; - os << ".pImageFormat = "; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_platform_get_backend_option_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_get_backend_option_params_t *params) { - ur_params::serializePtr(os, *(params->ppImageFormat)); + os << ".hPlatform = "; + + ur::details::printPtr(os, + *(params->phPlatform)); os << ", "; - os << ".pImageDesc = "; + os << ".pFrontendOption = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + ur::details::printPtr(os, + *(params->ppFrontendOption)); os << ", "; - os << ".phImageMem = "; + os << ".ppPlatformOption = "; - ur_params::serializePtr(os, *(params->pphImageMem)); + ur::details::printPtr(os, + *(params->pppPlatformOption)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_image_free_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_create_params_t *params) { - os << ".hContext = "; + os << ".DeviceCount = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->pDeviceCount); os << ", "; - os << ".hDevice = "; + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pDeviceCount; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; + + os << ", "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".hImageMem = "; + os << ".phContext = "; - ur_params::serializePtr(os, *(params->phImageMem)); + ur::details::printPtr(os, + *(params->pphContext)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_unsampled_image_create_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_retain_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); - os << ", "; - os << ".hDevice = "; + return os; +} - ur_params::serializePtr(os, *(params->phDevice)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_release_params_t *params) { - os << ", "; - os << ".hImageMem = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phImageMem)); + ur::details::printPtr(os, + *(params->phContext)); - os << ", "; - os << ".pImageFormat = "; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_get_info_params_t *params) { + + os << ".hContext = "; - ur_params::serializePtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pImageDesc = "; + os << ".propName = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + os << *(params->ppropName); os << ", "; - os << ".phMem = "; + os << ".propSize = "; - ur_params::serializePtr(os, *(params->pphMem)); + os << *(params->ppropSize); os << ", "; - os << ".phImage = "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); - ur_params::serializePtr(os, *(params->pphImage)); + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_sampled_image_create_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_get_native_handle_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".phNativeContext = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->pphNativeContext)); - os << ", "; - os << ".hImageMem = "; + return os; +} - ur_params::serializePtr(os, *(params->phImageMem)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_create_with_native_handle_params_t *params) { - os << ", "; - os << ".pImageFormat = "; + os << ".hNativeContext = "; - ur_params::serializePtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, + *(params->phNativeContext)); os << ", "; - os << ".pImageDesc = "; + os << ".numDevices = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + os << *(params->pnumDevices); os << ", "; - os << ".hSampler = "; + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur_params::serializePtr(os, *(params->phSampler)); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; os << ", "; - os << ".phMem = "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->pphMem)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".phImage = "; + os << ".phContext = "; - ur_params::serializePtr(os, *(params->pphImage)); + ur::details::printPtr(os, + *(params->pphContext)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_image_copy_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_context_set_extended_deleter_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_context_set_extended_deleter_params_t *params) { - os << ".hQueue = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pDst = "; + os << ".pfnDeleter = "; - ur_params::serializePtr(os, *(params->ppDst)); + os << reinterpret_cast( + *(params->ppfnDeleter)); os << ", "; - os << ".pSrc = "; + os << ".pUserData = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppUserData)); - os << ", "; - os << ".pImageFormat = "; + return os; +} - ur_params::serializePtr(os, *(params->ppImageFormat)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_get_info_params_t *params) { - os << ", "; - os << ".pImageDesc = "; + os << ".hEvent = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + ur::details::printPtr(os, + *(params->phEvent)); os << ", "; - os << ".imageCopyFlags = "; + os << ".propName = "; - ur_params::serializeFlag( - os, *(params->pimageCopyFlags)); + os << *(params->ppropName); os << ", "; - os << ".srcOffset = "; + os << ".propSize = "; - os << *(params->psrcOffset); + os << *(params->ppropSize); os << ", "; - os << ".dstOffset = "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); - os << *(params->pdstOffset); + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_get_profiling_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_get_profiling_info_params_t *params) { + + os << ".hEvent = "; + + ur::details::printPtr(os, + *(params->phEvent)); os << ", "; - os << ".copyExtent = "; + os << ".propName = "; - os << *(params->pcopyExtent); + os << *(params->ppropName); os << ", "; - os << ".hostExtent = "; + os << ".propSize = "; - os << *(params->phostExtent); + os << *(params->ppropSize); os << ", "; - os << ".numEventsInWaitList = "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); - os << *(params->pnumEventsInWaitList); + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_wait_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_wait_params_t *params) { + + os << ".numEvents = "; + + os << *(params->pnumEvents); os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEvents; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; - os << ", "; - os << ".phEvent = "; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_retain_params_t *params) { + + os << ".hEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->phEvent)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_image_get_info_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_release_params_t *params) { - os << ".hImageMem = "; + os << ".hEvent = "; - ur_params::serializePtr(os, *(params->phImageMem)); + ur::details::printPtr(os, + *(params->phEvent)); - os << ", "; - os << ".propName = "; + return os; +} - os << *(params->ppropName); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_get_native_handle_params_t *params) { - os << ", "; - os << ".pPropValue = "; + os << ".hEvent = "; - ur_params::serializePtr(os, *(params->ppPropValue)); + ur::details::printPtr(os, + *(params->phEvent)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phNativeEvent = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->pphNativeEvent)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_mipmap_get_level_exp_params_t *params) { - - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_create_with_native_handle_params_t *params) { - os << ", "; - os << ".hDevice = "; + os << ".hNativeEvent = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phNativeEvent)); os << ", "; - os << ".hImageMem = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phImageMem)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".mipmapLevel = "; + os << ".pProperties = "; - os << *(params->pmipmapLevel); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".phImageMem = "; + os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphImageMem)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_mipmap_free_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_set_callback_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_set_callback_params_t *params) { - os << ".hContext = "; + os << ".hEvent = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phEvent)); os << ", "; - os << ".hDevice = "; + os << ".execStatus = "; - ur_params::serializePtr(os, *(params->phDevice)); + os << *(params->pexecStatus); os << ", "; - os << ".hMem = "; + os << ".pfnNotify = "; + + os << reinterpret_cast( + *(params->ppfnNotify)); - ur_params::serializePtr(os, *(params->phMem)); + os << ", "; + os << ".pUserData = "; + + ur::details::printPtr(os, + *(params->ppUserData)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_import_opaque_fd_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_create_with_il_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_create_with_il_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pIL = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->ppIL)); os << ", "; - os << ".size = "; + os << ".length = "; - os << *(params->psize); + os << *(params->plength); os << ", "; - os << ".pInteropMemDesc = "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppInteropMemDesc)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".phInteropMem = "; + os << ".phProgram = "; - ur_params::serializePtr(os, *(params->pphInteropMem)); + ur::details::printPtr(os, + *(params->pphProgram)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_map_external_array_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_create_with_binary_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_create_with_binary_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pImageFormat = "; + os << ".size = "; - ur_params::serializePtr(os, *(params->ppImageFormat)); + os << *(params->psize); os << ", "; - os << ".pImageDesc = "; + os << ".pBinary = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + ur::details::printPtr(os, + *(params->ppBinary)); os << ", "; - os << ".hInteropMem = "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->phInteropMem)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".phImageMem = "; + os << ".phProgram = "; - ur_params::serializePtr(os, *(params->pphImageMem)); + ur::details::printPtr(os, + *(params->pphProgram)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_release_interop_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_build_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_build_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".hInteropMem = "; + os << ".pOptions = "; - ur_params::serializePtr(os, *(params->phInteropMem)); + ur::details::printPtr(os, + *(params->ppOptions)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct - ur_bindless_images_import_external_semaphore_opaque_fd_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_build_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_build_exp_params_t *params) { - os << ".hContext = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".hDevice = "; + os << ".numDevices = "; - ur_params::serializePtr(os, *(params->phDevice)); + os << *(params->pnumDevices); os << ", "; - os << ".pInteropSemaphoreDesc = "; + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur_params::serializePtr(os, *(params->ppInteropSemaphoreDesc)); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; os << ", "; - os << ".phInteropSemaphore = "; + os << ".pOptions = "; - ur_params::serializePtr(os, *(params->pphInteropSemaphore)); + ur::details::printPtr(os, + *(params->ppOptions)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_destroy_external_semaphore_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_compile_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_compile_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".hInteropSemaphore = "; + os << ".pOptions = "; - ur_params::serializePtr(os, *(params->phInteropSemaphore)); + ur::details::printPtr(os, + *(params->ppOptions)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_wait_external_semaphore_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_compile_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_compile_exp_params_t *params) { - os << ".hQueue = "; - - ur_params::serializePtr(os, *(params->phQueue)); - - os << ", "; - os << ".hSemaphore = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phSemaphore)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".numEventsInWaitList = "; + os << ".numDevices = "; - os << *(params->pnumEventsInWaitList); + os << *(params->pnumDevices); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); } os << "}"; os << ", "; - os << ".phEvent = "; + os << ".pOptions = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->ppOptions)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_signal_external_semaphore_exp_params_t - *params) { - - os << ".hQueue = "; - - ur_params::serializePtr(os, *(params->phQueue)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_link_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_link_params_t *params) { - os << ", "; - os << ".hSemaphore = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phSemaphore)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".numEventsInWaitList = "; + os << ".count = "; - os << *(params->pnumEventsInWaitList); + os << *(params->pcount); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + os << ".phPrograms = {"; + for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphPrograms))[i]); } os << "}"; os << ", "; - os << ".phEvent = "; + os << ".pOptions = "; + + ur::details::printPtr(os, + *(params->ppOptions)); + + os << ", "; + os << ".phProgram = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphProgram)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_create_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_link_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_link_exp_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".numDevices = "; - ur_params::serializePtr(os, *(params->phDevice)); + os << *(params->pnumDevices); os << ", "; - os << ".pCommandBufferDesc = "; + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur_params::serializePtr(os, *(params->ppCommandBufferDesc)); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; os << ", "; - os << ".phCommandBuffer = "; + os << ".count = "; - ur_params::serializePtr(os, *(params->pphCommandBuffer)); + os << *(params->pcount); - return os; -} + os << ", "; + os << ".phPrograms = {"; + for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; ++i) { + if (i != 0) { + os << ", "; + } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_retain_exp_params_t *params) { + ur::details::printPtr(os, + (*(params->pphPrograms))[i]); + } + os << "}"; - os << ".hCommandBuffer = "; + os << ", "; + os << ".pOptions = "; + + ur::details::printPtr(os, + *(params->ppOptions)); + + os << ", "; + os << ".phProgram = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->pphProgram)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_release_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_retain_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phProgram)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_finalize_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_release_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phProgram)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_kernel_launch_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_get_function_pointer_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_get_function_pointer_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".hKernel = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".workDim = "; + os << ".pFunctionName = "; - os << *(params->pworkDim); + ur::details::printPtr(os, + *(params->ppFunctionName)); os << ", "; - os << ".pGlobalWorkOffset = "; + os << ".ppFunctionPointer = "; - ur_params::serializePtr(os, *(params->ppGlobalWorkOffset)); + ur::details::printPtr(os, + *(params->pppFunctionPointer)); - os << ", "; - os << ".pGlobalWorkSize = "; + return os; +} - ur_params::serializePtr(os, *(params->ppGlobalWorkSize)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_get_info_params_t *params) { - os << ", "; - os << ".pLocalWorkSize = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->ppLocalWorkSize)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".propName = "; - os << *(params->pnumSyncPointsInWaitList); + os << *(params->ppropName); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".propSize = "; - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + os << *(params->ppropSize); os << ", "; - os << ".pSyncPoint = "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); - ur_params::serializePtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_memcpy_usm_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_get_build_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_get_build_info_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".pDst = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pSrc = "; + os << ".propName = "; - ur_params::serializePtr(os, *(params->ppSrc)); + os << *(params->ppropName); os << ", "; - os << ".size = "; + os << ".propSize = "; - os << *(params->psize); + os << *(params->ppropSize); os << ", "; - os << ".numSyncPointsInWaitList = "; - - os << *(params->pnumSyncPointsInWaitList); + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_set_specialization_constants_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_set_specialization_constants_params_t *params) { - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + os << ".hProgram = "; + + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".pSyncPoint = "; + os << ".count = "; + + os << *(params->pcount); - ur_params::serializePtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".pSpecConstants = {"; + for (size_t i = 0; *(params->ppSpecConstants) != NULL && i < *params->pcount; ++i) { + if (i != 0) { + os << ", "; + } + + os << (*(params->ppSpecConstants))[i]; + } + os << "}"; return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_copy_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_get_native_handle_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".hSrcMem = "; + os << ".phNativeProgram = "; + + ur::details::printPtr(os, + *(params->pphNativeProgram)); + + return os; +} - ur_params::serializePtr(os, *(params->phSrcMem)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_program_create_with_native_handle_params_t *params) { + + os << ".hNativeProgram = "; + + ur::details::printPtr(os, + *(params->phNativeProgram)); os << ", "; - os << ".hDstMem = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phDstMem)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".srcOffset = "; + os << ".pProperties = "; - os << *(params->psrcOffset); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".dstOffset = "; + os << ".phProgram = "; - os << *(params->pdstOffset); + ur::details::printPtr(os, + *(params->pphProgram)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_create_params_t *params) { + + os << ".hProgram = "; + + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; - os << ".size = "; + os << ".pKernelName = "; - os << *(params->psize); + ur::details::printPtr(os, + *(params->ppKernelName)); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".phKernel = "; - os << *(params->pnumSyncPointsInWaitList); + ur::details::printPtr(os, + *(params->pphKernel)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_get_info_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".propName = "; - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + os << *(params->ppropName); os << ", "; - os << ".pSyncPoint = "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppSyncPoint)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_append_membuffer_write_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_get_group_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_get_group_info_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hKernel = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; - os << ".hBuffer = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".offset = "; + os << ".propName = "; - os << *(params->poffset); + os << *(params->ppropName); os << ", "; - os << ".size = "; + os << ".propSize = "; - os << *(params->psize); + os << *(params->ppropSize); os << ", "; - os << ".pSrc = "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_get_sub_group_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_get_sub_group_info_params_t *params) { - ur_params::serializePtr(os, *(params->ppSrc)); + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".hDevice = "; - os << *(params->pnumSyncPointsInWaitList); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".propName = "; - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + os << *(params->ppropName); os << ", "; - os << ".pSyncPoint = "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); - ur_params::serializePtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_read_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_retain_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hKernel = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phKernel)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_release_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_get_native_handle_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; - os << ".hBuffer = "; + os << ".phNativeKernel = "; + + ur::details::printPtr(os, + *(params->pphNativeKernel)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_create_with_native_handle_params_t *params) { + + os << ".hNativeKernel = "; + + ur::details::printPtr(os, + *(params->phNativeKernel)); + + os << ", "; + os << ".hContext = "; + + ur::details::printPtr(os, + *(params->phContext)); + + os << ", "; + os << ".hProgram = "; + + ur::details::printPtr(os, + *(params->phProgram)); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".phKernel = "; + + ur::details::printPtr(os, + *(params->pphKernel)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_arg_value_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_arg_value_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".argIndex = "; + + os << *(params->pargIndex); + + os << ", "; + os << ".argSize = "; + + os << *(params->pargSize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".pArgValue = "; + + ur::details::printPtr(os, + *(params->ppArgValue)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_arg_local_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_arg_local_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".argIndex = "; + + os << *(params->pargIndex); + + os << ", "; + os << ".argSize = "; + + os << *(params->pargSize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_arg_pointer_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_arg_pointer_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".argIndex = "; + + os << *(params->pargIndex); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".pArgValue = "; + + ur::details::printPtr(os, + *(params->ppArgValue)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_exec_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_exec_info_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_arg_sampler_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_arg_sampler_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".argIndex = "; + + os << *(params->pargIndex); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".hArgValue = "; + + ur::details::printPtr(os, + *(params->phArgValue)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_arg_mem_obj_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_arg_mem_obj_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".argIndex = "; + + os << *(params->pargIndex); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".hArgValue = "; + + ur::details::printPtr(os, + *(params->phArgValue)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_set_specialization_constants_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_set_specialization_constants_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".count = "; + + os << *(params->pcount); + + os << ", "; + os << ".pSpecConstants = "; + + ur::details::printPtr(os, + *(params->ppSpecConstants)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_kernel_suggest_max_cooperative_group_count_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t *params) { + + os << ".hKernel = "; + + ur::details::printPtr(os, + *(params->phKernel)); + + os << ", "; + os << ".pGroupCountRet = "; + + ur::details::printPtr(os, + *(params->ppGroupCountRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_create_params_t *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, + *(params->phContext)); + + os << ", "; + os << ".pDesc = "; + + ur::details::printPtr(os, + *(params->ppDesc)); + + os << ", "; + os << ".phSampler = "; + + ur::details::printPtr(os, + *(params->pphSampler)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_retain_params_t *params) { + + os << ".hSampler = "; + + ur::details::printPtr(os, + *(params->phSampler)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_release_params_t *params) { + + os << ".hSampler = "; + + ur::details::printPtr(os, + *(params->phSampler)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_get_info_params_t *params) { + + os << ".hSampler = "; + + ur::details::printPtr(os, + *(params->phSampler)); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_get_native_handle_params_t *params) { + + os << ".hSampler = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phSampler)); os << ", "; - os << ".offset = "; + os << ".phNativeSampler = "; - os << *(params->poffset); + ur::details::printPtr(os, + *(params->pphNativeSampler)); - os << ", "; - os << ".size = "; + return os; +} - os << *(params->psize); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_sampler_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_sampler_create_with_native_handle_params_t *params) { - os << ", "; - os << ".pDst = "; + os << ".hNativeSampler = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->phNativeSampler)); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".hContext = "; - os << *(params->pnumSyncPointsInWaitList); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".pSyncPoint = "; + os << ".phSampler = "; - ur_params::serializePtr(os, *(params->ppSyncPoint)); + ur::details::printPtr(os, + *(params->pphSampler)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_copy_rect_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_image_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_image_create_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hSrcMem = "; + os << ".flags = "; - ur_params::serializePtr(os, *(params->phSrcMem)); + ur::details::printFlag(os, + *(params->pflags)); os << ", "; - os << ".hDstMem = "; + os << ".pImageFormat = "; - ur_params::serializePtr(os, *(params->phDstMem)); + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; - os << ".srcOrigin = "; + os << ".pImageDesc = "; - os << *(params->psrcOrigin); + ur::details::printPtr(os, + *(params->ppImageDesc)); os << ", "; - os << ".dstOrigin = "; + os << ".pHost = "; - os << *(params->pdstOrigin); + ur::details::printPtr(os, + *(params->ppHost)); os << ", "; - os << ".region = "; + os << ".phMem = "; - os << *(params->pregion); + ur::details::printPtr(os, + *(params->pphMem)); - os << ", "; - os << ".srcRowPitch = "; + return os; +} - os << *(params->psrcRowPitch); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_buffer_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_buffer_create_params_t *params) { - os << ", "; - os << ".srcSlicePitch = "; + os << ".hContext = "; - os << *(params->psrcSlicePitch); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".dstRowPitch = "; + os << ".flags = "; - os << *(params->pdstRowPitch); + ur::details::printFlag(os, + *(params->pflags)); os << ", "; - os << ".dstSlicePitch = "; + os << ".size = "; - os << *(params->pdstSlicePitch); + os << *(params->psize); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".pProperties = "; - os << *(params->pnumSyncPointsInWaitList); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".phBuffer = "; - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + ur::details::printPtr(os, + *(params->pphBuffer)); - os << ", "; - os << ".pSyncPoint = "; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_retain_params_t *params) { - ur_params::serializePtr(os, *(params->ppSyncPoint)); + os << ".hMem = "; + + ur::details::printPtr(os, + *(params->phMem)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_write_rect_exp_params_t - *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_release_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hMem = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phMem)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_buffer_partition_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_buffer_partition_params_t *params) { - os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; - os << ".bufferOffset = "; + os << ".flags = "; - os << *(params->pbufferOffset); + ur::details::printFlag(os, + *(params->pflags)); os << ", "; - os << ".hostOffset = "; + os << ".bufferCreateType = "; - os << *(params->phostOffset); + os << *(params->pbufferCreateType); os << ", "; - os << ".region = "; + os << ".pRegion = "; - os << *(params->pregion); + ur::details::printPtr(os, + *(params->ppRegion)); os << ", "; - os << ".bufferRowPitch = "; + os << ".phMem = "; - os << *(params->pbufferRowPitch); + ur::details::printPtr(os, + *(params->pphMem)); - os << ", "; - os << ".bufferSlicePitch = "; + return os; +} - os << *(params->pbufferSlicePitch); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_get_native_handle_params_t *params) { - os << ", "; - os << ".hostRowPitch = "; + os << ".hMem = "; - os << *(params->phostRowPitch); + ur::details::printPtr(os, + *(params->phMem)); os << ", "; - os << ".hostSlicePitch = "; + os << ".phNativeMem = "; - os << *(params->phostSlicePitch); + ur::details::printPtr(os, + *(params->pphNativeMem)); - os << ", "; - os << ".pSrc = "; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_buffer_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_buffer_create_with_native_handle_params_t *params) { - ur_params::serializePtr(os, *(params->ppSrc)); + os << ".hNativeMem = "; + + ur::details::printPtr(os, + *(params->phNativeMem)); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".hContext = "; - os << *(params->pnumSyncPointsInWaitList); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pSyncPointWaitList = "; + os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".pSyncPoint = "; + os << ".phMem = "; - ur_params::serializePtr(os, *(params->ppSyncPoint)); + ur::details::printPtr(os, + *(params->pphMem)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_read_rect_exp_params_t - *params) { - - os << ".hCommandBuffer = "; - - ur_params::serializePtr(os, *(params->phCommandBuffer)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_image_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_image_create_with_native_handle_params_t *params) { - os << ", "; - os << ".hBuffer = "; + os << ".hNativeMem = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phNativeMem)); os << ", "; - os << ".bufferOffset = "; + os << ".hContext = "; - os << *(params->pbufferOffset); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hostOffset = "; + os << ".pImageFormat = "; - os << *(params->phostOffset); + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; - os << ".region = "; + os << ".pImageDesc = "; - os << *(params->pregion); + ur::details::printPtr(os, + *(params->ppImageDesc)); os << ", "; - os << ".bufferRowPitch = "; + os << ".pProperties = "; - os << *(params->pbufferRowPitch); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".bufferSlicePitch = "; + os << ".phMem = "; - os << *(params->pbufferSlicePitch); + ur::details::printPtr(os, + *(params->pphMem)); - os << ", "; - os << ".hostRowPitch = "; + return os; +} - os << *(params->phostRowPitch); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_get_info_params_t *params) { - os << ", "; - os << ".hostSlicePitch = "; + os << ".hMemory = "; - os << *(params->phostSlicePitch); + ur::details::printPtr(os, + *(params->phMemory)); os << ", "; - os << ".pDst = "; + os << ".propName = "; - ur_params::serializePtr(os, *(params->ppDst)); + os << *(params->ppropName); os << ", "; - os << ".numSyncPointsInWaitList = "; + os << ".propSize = "; - os << *(params->pnumSyncPointsInWaitList); + os << *(params->ppropSize); os << ", "; - os << ".pSyncPointWaitList = "; - - ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; - os << ".pSyncPoint = "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppSyncPoint)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_enqueue_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_mem_image_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_mem_image_get_info_params_t *params) { - os << ".hCommandBuffer = "; + os << ".hMemory = "; - ur_params::serializePtr(os, *(params->phCommandBuffer)); + ur::details::printPtr(os, + *(params->phMemory)); os << ", "; - os << ".hQueue = "; + os << ".propName = "; - ur_params::serializePtr(os, *(params->phQueue)); + os << *(params->ppropName); os << ", "; - os << ".numEventsInWaitList = "; + os << ".propSize = "; - os << *(params->pnumEventsInWaitList); + os << *(params->ppropSize); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { - if (i != 0) { - os << ", "; - } - - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); - } - os << "}"; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; - os << ".phEvent = "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_context_create_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_physical_mem_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_physical_mem_create_params_t *params) { - os << ".DeviceCount = "; + os << ".hContext = "; - os << *(params->pDeviceCount); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; - *(params->pphDevices) != NULL && i < *params->pDeviceCount; ++i) { - if (i != 0) { - os << ", "; - } + os << ".hDevice = "; - ur_params::serializePtr(os, (*(params->pphDevices))[i]); - } - os << "}"; + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); os << ", "; os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".phContext = "; + os << ".phPhysicalMem = "; - ur_params::serializePtr(os, *(params->pphContext)); + ur::details::printPtr(os, + *(params->pphPhysicalMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_context_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_physical_mem_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_physical_mem_retain_params_t *params) { - os << ".hContext = "; + os << ".hPhysicalMem = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phPhysicalMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_context_release_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_physical_mem_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_physical_mem_release_params_t *params) { - os << ".hContext = "; + os << ".hPhysicalMem = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phPhysicalMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_get_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_adapter_get_params_t *params) { - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); - - os << ", "; - os << ".propName = "; + os << ".NumEntries = "; - os << *(params->ppropName); + os << *(params->pNumEntries); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".phAdapters = {"; + for (size_t i = 0; *(params->pphAdapters) != NULL && i < *params->pNumEntries; ++i) { + if (i != 0) { + os << ", "; + } - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + (*(params->pphAdapters))[i]); + } + os << "}"; os << ", "; - os << ".pPropSizeRet = "; + os << ".pNumAdapters = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppNumAdapters)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_get_native_handle_params_t *params) { - - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_adapter_release_params_t *params) { - os << ", "; - os << ".phNativeContext = "; + os << ".hAdapter = "; - ur_params::serializePtr(os, *(params->pphNativeContext)); + ur::details::printPtr(os, + *(params->phAdapter)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_adapter_retain_params_t *params) { - os << ".hNativeContext = "; + os << ".hAdapter = "; - ur_params::serializePtr(os, *(params->phNativeContext)); + ur::details::printPtr(os, + *(params->phAdapter)); - os << ", "; - os << ".numDevices = "; + return os; +} - os << *(params->pnumDevices); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_get_last_error_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_adapter_get_last_error_params_t *params) { - os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; - *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".hAdapter = "; - ur_params::serializePtr(os, (*(params->pphDevices))[i]); - } - os << "}"; + ur::details::printPtr(os, + *(params->phAdapter)); os << ", "; - os << ".pProperties = "; + os << ".ppMessage = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->pppMessage)); os << ", "; - os << ".phContext = "; + os << ".pError = "; - ur_params::serializePtr(os, *(params->pphContext)); + ur::details::printPtr(os, + *(params->ppError)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_set_extended_deleter_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_adapter_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_adapter_get_info_params_t *params) { - os << ".hContext = "; + os << ".hAdapter = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phAdapter)); os << ", "; - os << ".pfnDeleter = "; + os << ".propName = "; - os << *(params->ppfnDeleter); + os << *(params->ppropName); os << ", "; - os << ".pUserData = "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppUserData)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_kernel_launch_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_kernel_launch_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_kernel_launch_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hKernel = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; os << ".workDim = "; @@ -11385,17 +11536,20 @@ operator<<(std::ostream &os, os << ", "; os << ".pGlobalWorkOffset = "; - ur_params::serializePtr(os, *(params->ppGlobalWorkOffset)); + ur::details::printPtr(os, + *(params->ppGlobalWorkOffset)); os << ", "; os << ".pGlobalWorkSize = "; - ur_params::serializePtr(os, *(params->ppGlobalWorkSize)); + ur::details::printPtr(os, + *(params->ppGlobalWorkSize)); os << ", "; os << ".pLocalWorkSize = "; - ur_params::serializePtr(os, *(params->ppLocalWorkSize)); + ur::details::printPtr(os, + *(params->ppLocalWorkSize)); os << ", "; os << ".numEventsInWaitList = "; @@ -11404,32 +11558,35 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_events_wait_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_events_wait_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_events_wait_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".numEventsInWaitList = "; @@ -11438,32 +11595,35 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_events_wait_with_barrier_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_events_wait_with_barrier_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_events_wait_with_barrier_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".numEventsInWaitList = "; @@ -11472,37 +11632,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_read_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_read_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_read_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; os << ".blockingRead = "; @@ -11522,7 +11686,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".numEventsInWaitList = "; @@ -11531,37 +11696,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_write_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_write_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_write_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; os << ".blockingWrite = "; @@ -11581,7 +11750,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".numEventsInWaitList = "; @@ -11590,37 +11760,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_read_rect_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_read_rect_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_read_rect_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; os << ".blockingRead = "; @@ -11665,7 +11839,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".numEventsInWaitList = "; @@ -11674,37 +11849,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_write_rect_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_write_rect_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_write_rect_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; os << ".blockingWrite = "; @@ -11749,7 +11928,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".numEventsInWaitList = "; @@ -11758,42 +11938,47 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_copy_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_copy_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_copy_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBufferSrc = "; - ur_params::serializePtr(os, *(params->phBufferSrc)); + ur::details::printPtr(os, + *(params->phBufferSrc)); os << ", "; os << ".hBufferDst = "; - ur_params::serializePtr(os, *(params->phBufferDst)); + ur::details::printPtr(os, + *(params->phBufferDst)); os << ", "; os << ".srcOffset = "; @@ -11817,42 +12002,47 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_copy_rect_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_copy_rect_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_copy_rect_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBufferSrc = "; - ur_params::serializePtr(os, *(params->phBufferSrc)); + ur::details::printPtr(os, + *(params->phBufferSrc)); os << ", "; os << ".hBufferDst = "; - ur_params::serializePtr(os, *(params->phBufferDst)); + ur::details::printPtr(os, + *(params->phBufferDst)); os << ", "; os << ".srcOrigin = "; @@ -11896,42 +12086,47 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_fill_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_fill_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_fill_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; os << ".pPattern = "; - ur_params::serializePtr(os, *(params->ppPattern)); + ur::details::printPtr(os, + *(params->ppPattern)); os << ", "; os << ".patternSize = "; @@ -11955,37 +12150,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_image_read_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_image_read_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_image_read_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hImage = "; - ur_params::serializePtr(os, *(params->phImage)); + ur::details::printPtr(os, + *(params->phImage)); os << ", "; os << ".blockingRead = "; @@ -12015,7 +12214,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".numEventsInWaitList = "; @@ -12024,37 +12224,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_image_write_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_image_write_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_image_write_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hImage = "; - ur_params::serializePtr(os, *(params->phImage)); + ur::details::printPtr(os, + *(params->phImage)); os << ", "; os << ".blockingWrite = "; @@ -12084,7 +12288,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".numEventsInWaitList = "; @@ -12093,42 +12298,47 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_image_copy_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_image_copy_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_image_copy_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hImageSrc = "; - ur_params::serializePtr(os, *(params->phImageSrc)); + ur::details::printPtr(os, + *(params->phImageSrc)); os << ", "; os << ".hImageDst = "; - ur_params::serializePtr(os, *(params->phImageDst)); + ur::details::printPtr(os, + *(params->phImageDst)); os << ", "; os << ".srcOrigin = "; @@ -12152,37 +12362,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_map_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_buffer_map_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_buffer_map_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; os << ".blockingMap = "; @@ -12192,7 +12406,8 @@ operator<<(std::ostream &os, os << ", "; os << ".mapFlags = "; - ur_params::serializeFlag(os, *(params->pmapFlags)); + ur::details::printFlag(os, + *(params->pmapFlags)); os << ", "; os << ".offset = "; @@ -12211,47 +12426,53 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); os << ", "; os << ".ppRetMap = "; - ur_params::serializePtr(os, *(params->pppRetMap)); + ur::details::printPtr(os, + *(params->pppRetMap)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_unmap_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_mem_unmap_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_mem_unmap_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hMem = "; - ur_params::serializePtr(os, *(params->phMem)); + ur::details::printPtr(os, + *(params->phMem)); os << ", "; os << ".pMappedPtr = "; - ur_params::serializePtr(os, *(params->ppMappedPtr)); + ur::details::printPtr(os, + *(params->ppMappedPtr)); os << ", "; os << ".numEventsInWaitList = "; @@ -12260,37 +12481,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_fill_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_fill_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_fill_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; - os << ".ptr = "; + os << ".pMem = "; - ur_params::serializePtr(os, *(params->pptr)); + ur::details::printPtr(os, + *(params->ppMem)); os << ", "; os << ".patternSize = "; @@ -12300,7 +12525,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pPattern = "; - ur_params::serializePtr(os, *(params->ppPattern)); + ur::details::printPtr(os, + *(params->ppPattern)); os << ", "; os << ".size = "; @@ -12314,32 +12540,35 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_memcpy_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_memcpy_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_memcpy_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".blocking = "; @@ -12349,12 +12578,14 @@ operator<<(std::ostream &os, os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".size = "; @@ -12368,37 +12599,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_prefetch_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_prefetch_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_prefetch_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".pMem = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->ppMem)); os << ", "; os << ".size = "; @@ -12408,7 +12643,8 @@ operator<<(std::ostream &os, os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, *(params->pflags)); + ur::details::printFlag(os, + *(params->pflags)); os << ", "; os << ".numEventsInWaitList = "; @@ -12417,37 +12653,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_advise_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_advise_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_advise_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".pMem = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->ppMem)); os << ", "; os << ".size = "; @@ -12457,28 +12697,34 @@ operator<<(std::ostream &os, os << ", "; os << ".advice = "; - ur_params::serializeFlag(os, *(params->padvice)); + ur::details::printFlag(os, + *(params->padvice)); os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_fill_2d_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_fill_2d_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_fill_2d_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".pMem = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->ppMem)); os << ", "; os << ".pitch = "; @@ -12493,7 +12739,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pPattern = "; - ur_params::serializePtr(os, *(params->ppPattern)); + ur::details::printPtr(os, + *(params->ppPattern)); os << ", "; os << ".width = "; @@ -12512,32 +12759,35 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_memcpy_2d_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_memcpy_2d_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_memcpy_2d_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".blocking = "; @@ -12547,7 +12797,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".dstPitch = "; @@ -12557,7 +12808,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".srcPitch = "; @@ -12581,42 +12833,47 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_enqueue_device_global_variable_write_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_device_global_variable_write_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_device_global_variable_write_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; os << ".name = "; - ur_params::serializePtr(os, *(params->pname)); + ur::details::printPtr(os, + *(params->pname)); os << ", "; os << ".blockingWrite = "; @@ -12636,7 +12893,8 @@ inline std::ostream &operator<<( os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".numEventsInWaitList = "; @@ -12645,42 +12903,47 @@ inline std::ostream &operator<<( os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_enqueue_device_global_variable_read_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_device_global_variable_read_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_device_global_variable_read_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; os << ".name = "; - ur_params::serializePtr(os, *(params->pname)); + ur::details::printPtr(os, + *(params->pname)); os << ", "; os << ".blockingRead = "; @@ -12700,7 +12963,8 @@ inline std::ostream &operator<<( os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".numEventsInWaitList = "; @@ -12709,42 +12973,47 @@ inline std::ostream &operator<<( os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_read_host_pipe_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_read_host_pipe_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_read_host_pipe_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; os << ".pipe_symbol = "; - ur_params::serializePtr(os, *(params->ppipe_symbol)); + ur::details::printPtr(os, + *(params->ppipe_symbol)); os << ", "; os << ".blocking = "; @@ -12754,7 +13023,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pDst = "; - ur_params::serializePtr(os, *(params->ppDst)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; os << ".size = "; @@ -12768,42 +13038,47 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_write_host_pipe_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_write_host_pipe_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_write_host_pipe_params_t *params) { os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".hProgram = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phProgram)); os << ", "; os << ".pipe_symbol = "; - ur_params::serializePtr(os, *(params->ppipe_symbol)); + ur::details::printPtr(os, + *(params->ppipe_symbol)); os << ", "; os << ".blocking = "; @@ -12813,7 +13088,8 @@ operator<<(std::ostream &os, os << ", "; os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppSrc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".size = "; @@ -12827,222 +13103,101 @@ operator<<(std::ostream &os, os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && - i < *params->pnumEventsInWaitList; - ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_cooperative_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params) { - os << ".hEvent = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phEvent)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; - os << ".propName = "; + os << ".hKernel = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".workDim = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << *(params->pworkDim); os << ", "; - os << ".pPropSizeRet = "; - - ur_params::serializePtr(os, *(params->ppPropSizeRet)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_get_profiling_info_params_t *params) { - - os << ".hEvent = "; + os << ".pGlobalWorkOffset = "; - ur_params::serializePtr(os, *(params->phEvent)); + ur::details::printPtr(os, + *(params->ppGlobalWorkOffset)); os << ", "; - os << ".propName = "; + os << ".pGlobalWorkSize = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppGlobalWorkSize)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".pLocalWorkSize = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->ppLocalWorkSize)); os << ", "; - os << ".pPropSizeRet = "; - - ur_params::serializePtr(os, *(params->ppPropSizeRet)); - - return os; -} - -inline std::ostream &operator<<(std::ostream &os, - const struct ur_event_wait_params_t *params) { - - os << ".numEvents = "; + os << ".numEventsInWaitList = "; - os << *(params->pnumEvents); + os << *(params->pnumEventsInWaitList); os << ", "; os << ".phEventWaitList = {"; - for (size_t i = 0; - *(params->pphEventWaitList) != NULL && i < *params->pnumEvents; ++i) { + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; - return os; -} - -inline std::ostream &operator<<(std::ostream &os, - const struct ur_event_retain_params_t *params) { - - os << ".hEvent = "; - - ur_params::serializePtr(os, *(params->phEvent)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_release_params_t *params) { - - os << ".hEvent = "; - - ur_params::serializePtr(os, *(params->phEvent)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_get_native_handle_params_t *params) { - - os << ".hEvent = "; - - ur_params::serializePtr(os, *(params->phEvent)); - - os << ", "; - os << ".phNativeEvent = "; - - ur_params::serializePtr(os, *(params->pphNativeEvent)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_create_with_native_handle_params_t *params) { - - os << ".hNativeEvent = "; - - ur_params::serializePtr(os, *(params->phNativeEvent)); - - os << ", "; - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); - - os << ", "; - os << ".pProperties = "; - - ur_params::serializePtr(os, *(params->ppProperties)); - os << ", "; os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphEvent)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_set_callback_params_t *params) { - - os << ".hEvent = "; - - ur_params::serializePtr(os, *(params->phEvent)); - - os << ", "; - os << ".execStatus = "; - - os << *(params->pexecStatus); - - os << ", "; - os << ".pfnNotify = "; - - os << *(params->ppfnNotify); - - os << ", "; - os << ".pUserData = "; - - ur_params::serializePtr(os, *(params->ppUserData)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_create_params_t *params) { - - os << ".hProgram = "; - - ur_params::serializePtr(os, *(params->phProgram)); - - os << ", "; - os << ".pKernelName = "; - - ur_params::serializePtr(os, *(params->ppKernelName)); - - os << ", "; - os << ".phKernel = "; - - ur_params::serializePtr(os, *(params->pphKernel)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_get_info_params_t *params) { - os << ".hKernel = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; os << ".propName = "; @@ -13056,1945 +13211,2084 @@ operator<<(std::ostream &os, const struct ur_kernel_get_info_params_t *params) { os << ", "; os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_get_group_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_create_params_t *params) { - os << ".hKernel = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".propName = "; - - os << *(params->ppropName); - - os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".pProperties = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phQueue = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->pphQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_get_sub_group_info_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); - - os << ", "; - os << ".hDevice = "; - - ur_params::serializePtr(os, *(params->phDevice)); - - os << ", "; - os << ".propName = "; - - os << *(params->ppropName); - - os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); - - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_retain_params_t *params) { - os << ", "; - os << ".pPropSizeRet = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->phQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_release_params_t *params) { - os << ".hKernel = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_release_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_get_native_handle_params_t *params) { - return os; -} + os << ".hQueue = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_get_native_handle_params_t *params) { + ur::details::printPtr(os, + *(params->phQueue)); - os << ".hKernel = "; + os << ", "; + os << ".pDesc = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->ppDesc)); os << ", "; - os << ".phNativeKernel = "; + os << ".phNativeQueue = "; - ur_params::serializePtr(os, *(params->pphNativeKernel)); + ur::details::printPtr(os, + *(params->pphNativeQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_create_with_native_handle_params_t *params) { - os << ".hNativeKernel = "; + os << ".hNativeQueue = "; - ur_params::serializePtr(os, *(params->phNativeKernel)); + ur::details::printPtr(os, + *(params->phNativeQueue)); os << ", "; os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hProgram = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; - os << ".phKernel = "; + os << ".phQueue = "; - ur_params::serializePtr(os, *(params->pphKernel)); + ur::details::printPtr(os, + *(params->pphQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_value_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); - - os << ", "; - os << ".argIndex = "; - - os << *(params->pargIndex); - - os << ", "; - os << ".argSize = "; - - os << *(params->pargSize); - - os << ", "; - os << ".pProperties = "; - - ur_params::serializePtr(os, *(params->ppProperties)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_finish_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_finish_params_t *params) { - os << ", "; - os << ".pArgValue = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->ppArgValue)); + ur::details::printPtr(os, + *(params->phQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_local_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); - - os << ", "; - os << ".argIndex = "; - - os << *(params->pargIndex); - - os << ", "; - os << ".argSize = "; - - os << *(params->pargSize); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_queue_flush_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_queue_flush_params_t *params) { - os << ", "; - os << ".pProperties = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->phQueue)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_pointer_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params) { - os << ", "; - os << ".argIndex = "; + os << ".hContext = "; - os << *(params->pargIndex); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pProperties = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pArgValue = "; + os << ".hImage = "; - ur_params::serializePtr(os, *(params->ppArgValue)); + ur::details::printPtr(os, + *(params->phImage)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_exec_info_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_sampled_image_handle_destroy_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params) { - os << ", "; - os << ".propName = "; + os << ".hContext = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".propSize = "; + os << ".hDevice = "; - os << *(params->ppropSize); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pProperties = "; - - ur_params::serializePtr(os, *(params->ppProperties)); + os << ".hImage = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->phImage)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_sampler_params_t *params) { - - os << ".hKernel = "; - - ur_params::serializePtr(os, *(params->phKernel)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_image_allocate_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_allocate_exp_params_t *params) { - os << ", "; - os << ".argIndex = "; - - os << *(params->pargIndex); - - os << ", "; - os << ".pProperties = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hArgValue = "; - - ur_params::serializePtr(os, *(params->phArgValue)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_mem_obj_params_t *params) { - - os << ".hKernel = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".argIndex = "; + os << ".pImageFormat = "; - os << *(params->pargIndex); + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; - os << ".pProperties = "; + os << ".pImageDesc = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppImageDesc)); os << ", "; - os << ".hArgValue = "; + os << ".phImageMem = "; - ur_params::serializePtr(os, *(params->phArgValue)); + ur::details::printPtr(os, + *(params->pphImageMem)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_kernel_set_specialization_constants_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_image_free_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_free_exp_params_t *params) { - os << ".hKernel = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phKernel)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".count = "; + os << ".hDevice = "; - os << *(params->pcount); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pSpecConstants = "; + os << ".hImageMem = "; - ur_params::serializePtr(os, *(params->ppSpecConstants)); + ur::details::printPtr(os, + *(params->phImageMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_create_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_unsampled_image_create_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_unsampled_image_create_exp_params_t *params) { - os << ".phLoaderConfig = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->pphLoaderConfig)); + ur::details::printPtr(os, + *(params->phContext)); - return os; -} + os << ", "; + os << ".hDevice = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_retain_params_t *params) { + ur::details::printPtr(os, + *(params->phDevice)); - os << ".hLoaderConfig = "; + os << ", "; + os << ".hImageMem = "; - ur_params::serializePtr(os, *(params->phLoaderConfig)); + ur::details::printPtr(os, + *(params->phImageMem)); - return os; -} + os << ", "; + os << ".pImageFormat = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_release_params_t *params) { + ur::details::printPtr(os, + *(params->ppImageFormat)); - os << ".hLoaderConfig = "; + os << ", "; + os << ".pImageDesc = "; - ur_params::serializePtr(os, *(params->phLoaderConfig)); + ur::details::printPtr(os, + *(params->ppImageDesc)); + + os << ", "; + os << ".phMem = "; + + ur::details::printPtr(os, + *(params->pphMem)); + + os << ", "; + os << ".phImage = "; + + ur::details::printPtr(os, + *(params->pphImage)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_sampled_image_create_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_sampled_image_create_exp_params_t *params) { - os << ".hLoaderConfig = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phLoaderConfig)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".propName = "; + os << ".hDevice = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".propSize = "; + os << ".hImageMem = "; - os << *(params->ppropSize); + ur::details::printPtr(os, + *(params->phImageMem)); os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << ".pImageFormat = "; + + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; - os << ".pPropSizeRet = "; + os << ".pImageDesc = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppImageDesc)); - return os; -} + os << ", "; + os << ".hSampler = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_enable_layer_params_t *params) { + ur::details::printPtr(os, + *(params->phSampler)); - os << ".hLoaderConfig = "; + os << ", "; + os << ".phMem = "; - ur_params::serializePtr(os, *(params->phLoaderConfig)); + ur::details::printPtr(os, + *(params->pphMem)); os << ", "; - os << ".pLayerName = "; + os << ".phImage = "; - ur_params::serializePtr(os, *(params->ppLayerName)); + ur::details::printPtr(os, + *(params->pphImage)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_image_create_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_image_copy_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_copy_exp_params_t *params) { - os << ".hContext = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; - os << ".flags = "; + os << ".pDst = "; + + ur::details::printPtr(os, + *(params->ppDst)); + + os << ", "; + os << ".pSrc = "; - ur_params::serializeFlag(os, *(params->pflags)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; os << ".pImageFormat = "; - ur_params::serializePtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; os << ".pImageDesc = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + ur::details::printPtr(os, + *(params->ppImageDesc)); os << ", "; - os << ".pHost = "; + os << ".imageCopyFlags = "; - ur_params::serializePtr(os, *(params->ppHost)); + ur::details::printFlag(os, + *(params->pimageCopyFlags)); os << ", "; - os << ".phMem = "; + os << ".srcOffset = "; - ur_params::serializePtr(os, *(params->pphMem)); + os << *(params->psrcOffset); - return os; -} + os << ", "; + os << ".dstOffset = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_buffer_create_params_t *params) { + os << *(params->pdstOffset); - os << ".hContext = "; + os << ", "; + os << ".copyExtent = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->pcopyExtent); os << ", "; - os << ".flags = "; + os << ".hostExtent = "; - ur_params::serializeFlag(os, *(params->pflags)); + os << *(params->phostExtent); os << ", "; - os << ".size = "; + os << ".numEventsInWaitList = "; - os << *(params->psize); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".pProperties = "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; os << ", "; - os << ".phBuffer = "; + os << ".phEvent = "; - ur_params::serializePtr(os, *(params->pphBuffer)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_image_get_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_get_info_exp_params_t *params) { - os << ".hMem = "; + os << ".hImageMem = "; - ur_params::serializePtr(os, *(params->phMem)); + ur::details::printPtr(os, + *(params->phImageMem)); - return os; -} + os << ", "; + os << ".propName = "; -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_release_params_t *params) { + os << *(params->ppropName); - os << ".hMem = "; + os << ", "; + os << ".pPropValue = "; + + ur::details::printPtr(os, + *(params->ppPropValue)); + + os << ", "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->phMem)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_buffer_partition_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_mipmap_get_level_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_mipmap_get_level_exp_params_t *params) { - os << ".hBuffer = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phBuffer)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".flags = "; + os << ".hDevice = "; - ur_params::serializeFlag(os, *(params->pflags)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".bufferCreateType = "; + os << ".hImageMem = "; - os << *(params->pbufferCreateType); + ur::details::printPtr(os, + *(params->phImageMem)); os << ", "; - os << ".pRegion = "; + os << ".mipmapLevel = "; - ur_params::serializePtr(os, *(params->ppRegion)); + os << *(params->pmipmapLevel); os << ", "; - os << ".phMem = "; + os << ".phImageMem = "; - ur_params::serializePtr(os, *(params->pphMem)); + ur::details::printPtr(os, + *(params->pphImageMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_get_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_mipmap_free_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_mipmap_free_exp_params_t *params) { - os << ".hMem = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phMem)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".phNativeMem = "; + os << ".hDevice = "; + + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".hMem = "; - ur_params::serializePtr(os, *(params->pphNativeMem)); + ur::details::printPtr(os, + *(params->phMem)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_mem_buffer_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_import_opaque_fd_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_opaque_fd_exp_params_t *params) { - os << ".hNativeMem = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phNativeMem)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hContext = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pProperties = "; + os << ".size = "; - ur_params::serializePtr(os, *(params->ppProperties)); + os << *(params->psize); os << ", "; - os << ".phMem = "; + os << ".pInteropMemDesc = "; - ur_params::serializePtr(os, *(params->pphMem)); + ur::details::printPtr(os, + *(params->ppInteropMemDesc)); + + os << ", "; + os << ".phInteropMem = "; + + ur::details::printPtr(os, + *(params->pphInteropMem)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_mem_image_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_map_external_array_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_map_external_array_exp_params_t *params) { - os << ".hNativeMem = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phNativeMem)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hContext = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".pImageFormat = "; - ur_params::serializePtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; os << ".pImageDesc = "; - ur_params::serializePtr(os, *(params->ppImageDesc)); + ur::details::printPtr(os, + *(params->ppImageDesc)); os << ", "; - os << ".pProperties = "; + os << ".hInteropMem = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->phInteropMem)); os << ", "; - os << ".phMem = "; + os << ".phImageMem = "; - ur_params::serializePtr(os, *(params->pphMem)); + ur::details::printPtr(os, + *(params->pphImageMem)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_get_info_params_t *params) { - - os << ".hMemory = "; - - ur_params::serializePtr(os, *(params->phMemory)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_release_interop_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_release_interop_exp_params_t *params) { - os << ", "; - os << ".propName = "; + os << ".hContext = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".hDevice = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pPropSizeRet = "; + os << ".hInteropMem = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->phInteropMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_image_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_import_external_semaphore_opaque_fd_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_external_semaphore_opaque_fd_exp_params_t *params) { - os << ".hMemory = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phMemory)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".propName = "; + os << ".hDevice = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".pInteropSemaphoreDesc = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->ppInteropSemaphoreDesc)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phInteropSemaphore = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->pphInteropSemaphore)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_physical_mem_create_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_destroy_external_semaphore_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_destroy_external_semaphore_exp_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".size = "; + os << ".hInteropSemaphore = "; - os << *(params->psize); + ur::details::printPtr(os, + *(params->phInteropSemaphore)); - os << ", "; - os << ".pProperties = "; + return os; +} - ur_params::serializePtr(os, *(params->ppProperties)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_wait_external_semaphore_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_wait_external_semaphore_exp_params_t *params) { - os << ", "; - os << ".phPhysicalMem = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->pphPhysicalMem)); + ur::details::printPtr(os, + *(params->phQueue)); - return os; -} + os << ", "; + os << ".hSemaphore = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_physical_mem_retain_params_t *params) { + ur::details::printPtr(os, + *(params->phSemaphore)); - os << ".hPhysicalMem = "; + os << ", "; + os << ".numEventsInWaitList = "; - ur_params::serializePtr(os, *(params->phPhysicalMem)); + os << *(params->pnumEventsInWaitList); - return os; -} + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_physical_mem_release_params_t *params) { + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; - os << ".hPhysicalMem = "; + os << ", "; + os << ".phEvent = "; - ur_params::serializePtr(os, *(params->phPhysicalMem)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_platform_get_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_signal_external_semaphore_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_signal_external_semaphore_exp_params_t *params) { - os << ".phAdapters = {"; - for (size_t i = 0; - *(params->pphAdapters) != NULL && i < *params->pNumAdapters; ++i) { - if (i != 0) { - os << ", "; - } + os << ".hQueue = "; - ur_params::serializePtr(os, (*(params->pphAdapters))[i]); - } - os << "}"; + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; - os << ".NumAdapters = "; + os << ".hSemaphore = "; - os << *(params->pNumAdapters); + ur::details::printPtr(os, + *(params->phSemaphore)); os << ", "; - os << ".NumEntries = "; + os << ".numEventsInWaitList = "; - os << *(params->pNumEntries); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phPlatforms = {"; - for (size_t i = 0; - *(params->pphPlatforms) != NULL && i < *params->pNumEntries; ++i) { + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphPlatforms))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } os << "}"; os << ", "; - os << ".pNumPlatforms = "; + os << ".phEvent = "; - ur_params::serializePtr(os, *(params->ppNumPlatforms)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_host_alloc_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_host_alloc_params_t *params) { - os << ".hPlatform = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phPlatform)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".propName = "; + os << ".pUSMDesc = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppUSMDesc)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".pool = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->ppool)); os << ", "; - os << ".pPropSizeRet = "; - - ur_params::serializePtr(os, *(params->ppPropSizeRet)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_get_native_handle_params_t *params) { - - os << ".hPlatform = "; + os << ".size = "; - ur_params::serializePtr(os, *(params->phPlatform)); + os << *(params->psize); os << ", "; - os << ".phNativePlatform = "; + os << ".ppMem = "; - ur_params::serializePtr(os, *(params->pphNativePlatform)); + ur::details::printPtr(os, + *(params->pppMem)); return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_platform_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_device_alloc_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_device_alloc_params_t *params) { - os << ".hNativePlatform = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phNativePlatform)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pProperties = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".phPlatform = "; + os << ".pUSMDesc = "; - ur_params::serializePtr(os, *(params->pphPlatform)); + ur::details::printPtr(os, + *(params->ppUSMDesc)); - return os; -} + os << ", "; + os << ".pool = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_get_api_version_params_t *params) { + ur::details::printPtr(os, + *(params->ppool)); - os << ".hPlatform = "; + os << ", "; + os << ".size = "; - ur_params::serializePtr(os, *(params->phPlatform)); + os << *(params->psize); os << ", "; - os << ".pVersion = "; + os << ".ppMem = "; - ur_params::serializePtr(os, *(params->ppVersion)); + ur::details::printPtr(os, + *(params->pppMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_get_backend_option_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_shared_alloc_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_shared_alloc_params_t *params) { - os << ".hPlatform = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->phPlatform)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".pFrontendOption = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->ppFrontendOption)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".ppPlatformOption = "; + os << ".pUSMDesc = "; - ur_params::serializePtr(os, *(params->pppPlatformOption)); + ur::details::printPtr(os, + *(params->ppUSMDesc)); - return os; -} + os << ", "; + os << ".pool = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_create_with_il_params_t *params) { + ur::details::printPtr(os, + *(params->ppool)); - os << ".hContext = "; + os << ", "; + os << ".size = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->psize); os << ", "; - os << ".pIL = "; + os << ".ppMem = "; - ur_params::serializePtr(os, *(params->ppIL)); + ur::details::printPtr(os, + *(params->pppMem)); - os << ", "; - os << ".length = "; + return os; +} - os << *(params->plength); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_free_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_free_params_t *params) { - os << ", "; - os << ".pProperties = "; + os << ".hContext = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".phProgram = "; + os << ".pMem = "; - ur_params::serializePtr(os, *(params->pphProgram)); + ur::details::printPtr(os, + *(params->ppMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_create_with_binary_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_get_mem_alloc_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_get_mem_alloc_info_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pMem = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->ppMem)); os << ", "; - os << ".size = "; + os << ".propName = "; - os << *(params->psize); + os << *(params->ppropName); os << ", "; - os << ".pBinary = "; + os << ".propSize = "; - ur_params::serializePtr(os, *(params->ppBinary)); + os << *(params->ppropSize); os << ", "; - os << ".pProperties = "; - - ur_params::serializePtr(os, *(params->ppProperties)); + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; - os << ".phProgram = "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->pphProgram)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_build_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_create_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hProgram = "; + os << ".pPoolDesc = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->ppPoolDesc)); os << ", "; - os << ".pOptions = "; + os << ".ppPool = "; - ur_params::serializePtr(os, *(params->ppOptions)); + ur::details::printPtr(os, + *(params->pppPool)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_build_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_retain_params_t *params) { - os << ".hProgram = "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); - ur_params::serializePtr(os, *(params->phProgram)); + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_release_params_t *params) { + + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_get_info_params_t *params) { + + os << ".hPool = "; + + ur::details::printPtr(os, + *(params->phPool)); os << ", "; - os << ".numDevices = "; + os << ".propName = "; - os << *(params->pnumDevices); + os << *(params->ppropName); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; - *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".propSize = "; - ur_params::serializePtr(os, (*(params->pphDevices))[i]); - } - os << "}"; + os << *(params->ppropSize); os << ", "; - os << ".pOptions = "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppOptions)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_compile_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pitched_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pitched_alloc_exp_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".hProgram = "; + os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".pOptions = "; + os << ".pUSMDesc = "; - ur_params::serializePtr(os, *(params->ppOptions)); + ur::details::printPtr(os, + *(params->ppUSMDesc)); - return os; -} + os << ", "; + os << ".pool = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_compile_exp_params_t *params) { + ur::details::printPtr(os, + *(params->ppool)); - os << ".hProgram = "; + os << ", "; + os << ".widthInBytes = "; - ur_params::serializePtr(os, *(params->phProgram)); + os << *(params->pwidthInBytes); os << ", "; - os << ".numDevices = "; + os << ".height = "; - os << *(params->pnumDevices); + os << *(params->pheight); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; - *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".elementSizeBytes = "; - ur_params::serializePtr(os, (*(params->pphDevices))[i]); - } - os << "}"; + os << *(params->pelementSizeBytes); os << ", "; - os << ".pOptions = "; + os << ".ppMem = "; - ur_params::serializePtr(os, *(params->ppOptions)); + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".pResultPitch = "; + + ur::details::printPtr(os, + *(params->ppResultPitch)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_link_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_import_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_import_exp_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".count = "; + os << ".pMem = "; - os << *(params->pcount); + ur::details::printPtr(os, + *(params->ppMem)); os << ", "; - os << ".phPrograms = {"; - for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; - ++i) { - if (i != 0) { - os << ", "; - } + os << ".size = "; - ur_params::serializePtr(os, (*(params->pphPrograms))[i]); - } - os << "}"; + os << *(params->psize); - os << ", "; - os << ".pOptions = "; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_release_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_release_exp_params_t *params) { - ur_params::serializePtr(os, *(params->ppOptions)); + os << ".hContext = "; + + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".phProgram = "; + os << ".pMem = "; - ur_params::serializePtr(os, *(params->pphProgram)); + ur::details::printPtr(os, + *(params->ppMem)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_link_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_create_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_create_exp_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".numDevices = "; - - os << *(params->pnumDevices); - - os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; - *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".hDevice = "; - ur_params::serializePtr(os, (*(params->pphDevices))[i]); - } - os << "}"; + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".count = "; + os << ".pCommandBufferDesc = "; - os << *(params->pcount); + ur::details::printPtr(os, + *(params->ppCommandBufferDesc)); os << ", "; - os << ".phPrograms = {"; - for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; - ++i) { - if (i != 0) { - os << ", "; - } + os << ".phCommandBuffer = "; - ur_params::serializePtr(os, (*(params->pphPrograms))[i]); - } - os << "}"; + ur::details::printPtr(os, + *(params->pphCommandBuffer)); - os << ", "; - os << ".pOptions = "; + return os; +} - ur_params::serializePtr(os, *(params->ppOptions)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_retain_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_retain_exp_params_t *params) { - os << ", "; - os << ".phProgram = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->pphProgram)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_release_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_release_exp_params_t *params) { - os << ".hProgram = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_release_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_finalize_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_finalize_exp_params_t *params) { - os << ".hProgram = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_get_function_pointer_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_kernel_launch_exp_params_t *params) { - os << ".hDevice = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".hProgram = "; + os << ".hKernel = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phKernel)); os << ", "; - os << ".pFunctionName = "; + os << ".workDim = "; - ur_params::serializePtr(os, *(params->ppFunctionName)); + os << *(params->pworkDim); os << ", "; - os << ".ppFunctionPointer = "; - - ur_params::serializePtr(os, *(params->pppFunctionPointer)); - - return os; -} + os << ".pGlobalWorkOffset = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_get_info_params_t *params) { + ur::details::printPtr(os, + *(params->ppGlobalWorkOffset)); - os << ".hProgram = "; + os << ", "; + os << ".pGlobalWorkSize = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->ppGlobalWorkSize)); os << ", "; - os << ".propName = "; + os << ".pLocalWorkSize = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppLocalWorkSize)); os << ", "; - os << ".propSize = "; + os << ".numSyncPointsInWaitList = "; - os << *(params->ppropSize); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << ".pSyncPointWaitList = "; + + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".pPropSizeRet = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_get_build_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_usm_memcpy_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_usm_memcpy_exp_params_t *params) { - os << ".hProgram = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".hDevice = "; + os << ".pDst = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; - os << ".propName = "; + os << ".pSrc = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".size = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << *(params->psize); os << ", "; - os << ".pPropSizeRet = "; - - ur_params::serializePtr(os, *(params->ppPropSizeRet)); - - return os; -} - -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_program_set_specialization_constants_params_t *params) { - - os << ".hProgram = "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->phProgram)); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".count = "; + os << ".pSyncPointWaitList = "; - os << *(params->pcount); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".pSpecConstants = {"; - for (size_t i = 0; - *(params->ppSpecConstants) != NULL && i < *params->pcount; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pSyncPoint = "; - os << (*(params->ppSpecConstants))[i]; - } - os << "}"; + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_get_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_usm_fill_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_usm_fill_exp_params_t *params) { - os << ".hProgram = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phProgram)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".phNativeProgram = "; + os << ".pMemory = "; - ur_params::serializePtr(os, *(params->pphNativeProgram)); + ur::details::printPtr(os, + *(params->ppMemory)); - return os; -} + os << ", "; + os << ".pPattern = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_create_with_native_handle_params_t *params) { + ur::details::printPtr(os, + *(params->ppPattern)); - os << ".hNativeProgram = "; + os << ", "; + os << ".patternSize = "; + + os << *(params->ppatternSize); + + os << ", "; + os << ".size = "; - ur_params::serializePtr(os, *(params->phNativeProgram)); + os << *(params->psize); os << ", "; - os << ".hContext = "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".pProperties = "; + os << ".pSyncPointWaitList = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".phProgram = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->pphProgram)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_copy_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_copy_exp_params_t *params) { - os << ".hQueue = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".propName = "; + os << ".hSrcMem = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phSrcMem)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".hDstMem = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printPtr(os, + *(params->phDstMem)); os << ", "; - os << ".pPropSizeRet = "; + os << ".srcOffset = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + os << *(params->psrcOffset); - return os; -} + os << ", "; + os << ".dstOffset = "; -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_create_params_t *params) { + os << *(params->pdstOffset); - os << ".hContext = "; + os << ", "; + os << ".size = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->psize); os << ", "; - os << ".hDevice = "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->phDevice)); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".pProperties = "; + os << ".pSyncPointWaitList = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".phQueue = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->pphQueue)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_write_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_write_exp_params_t *params) { - os << ".hQueue = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); - return os; -} + os << ", "; + os << ".hBuffer = "; -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_release_params_t *params) { + ur::details::printPtr(os, + *(params->phBuffer)); - os << ".hQueue = "; + os << ", "; + os << ".offset = "; + + os << *(params->poffset); - ur_params::serializePtr(os, *(params->phQueue)); + os << ", "; + os << ".size = "; - return os; -} + os << *(params->psize); + + os << ", "; + os << ".pSrc = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_queue_get_native_handle_params_t *params) { + ur::details::printPtr(os, + *(params->ppSrc)); - os << ".hQueue = "; + os << ", "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->phQueue)); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".pDesc = "; + os << ".pSyncPointWaitList = "; - ur_params::serializePtr(os, *(params->ppDesc)); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".phNativeQueue = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->pphNativeQueue)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_queue_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_read_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_read_exp_params_t *params) { - os << ".hNativeQueue = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phNativeQueue)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".hContext = "; + os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; - os << ".hDevice = "; + os << ".offset = "; - ur_params::serializePtr(os, *(params->phDevice)); + os << *(params->poffset); os << ", "; - os << ".pProperties = "; + os << ".size = "; - ur_params::serializePtr(os, *(params->ppProperties)); + os << *(params->psize); os << ", "; - os << ".phQueue = "; - - ur_params::serializePtr(os, *(params->pphQueue)); - - return os; -} + os << ".pDst = "; -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_finish_params_t *params) { + ur::details::printPtr(os, + *(params->ppDst)); - os << ".hQueue = "; + os << ", "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->phQueue)); + os << *(params->pnumSyncPointsInWaitList); - return os; -} + os << ", "; + os << ".pSyncPointWaitList = "; -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_flush_params_t *params) { + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); - os << ".hQueue = "; + os << ", "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->phQueue)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_sampler_create_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t *params) { - os << ".hContext = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".pDesc = "; + os << ".hSrcMem = "; - ur_params::serializePtr(os, *(params->ppDesc)); + ur::details::printPtr(os, + *(params->phSrcMem)); os << ", "; - os << ".phSampler = "; + os << ".hDstMem = "; - ur_params::serializePtr(os, *(params->pphSampler)); + ur::details::printPtr(os, + *(params->phDstMem)); - return os; -} + os << ", "; + os << ".srcOrigin = "; -inline std::ostream & -operator<<(std::ostream &os, const struct ur_sampler_retain_params_t *params) { + os << *(params->psrcOrigin); - os << ".hSampler = "; + os << ", "; + os << ".dstOrigin = "; - ur_params::serializePtr(os, *(params->phSampler)); + os << *(params->pdstOrigin); - return os; -} + os << ", "; + os << ".region = "; -inline std::ostream & -operator<<(std::ostream &os, const struct ur_sampler_release_params_t *params) { + os << *(params->pregion); - os << ".hSampler = "; + os << ", "; + os << ".srcRowPitch = "; - ur_params::serializePtr(os, *(params->phSampler)); + os << *(params->psrcRowPitch); - return os; -} + os << ", "; + os << ".srcSlicePitch = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_sampler_get_info_params_t *params) { + os << *(params->psrcSlicePitch); - os << ".hSampler = "; + os << ", "; + os << ".dstRowPitch = "; - ur_params::serializePtr(os, *(params->phSampler)); + os << *(params->pdstRowPitch); os << ", "; - os << ".propName = "; + os << ".dstSlicePitch = "; - os << *(params->ppropName); + os << *(params->pdstSlicePitch); os << ", "; - os << ".propSize = "; + os << ".numSyncPointsInWaitList = "; - os << *(params->ppropSize); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << ".pSyncPointWaitList = "; + + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".pPropSizeRet = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_sampler_get_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_write_rect_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_write_rect_exp_params_t *params) { - os << ".hSampler = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phSampler)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".phNativeSampler = "; + os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->pphNativeSampler)); + ur::details::printPtr(os, + *(params->phBuffer)); - return os; -} + os << ", "; + os << ".bufferOffset = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_sampler_create_with_native_handle_params_t *params) { + os << *(params->pbufferOffset); - os << ".hNativeSampler = "; + os << ", "; + os << ".hostOffset = "; - ur_params::serializePtr(os, *(params->phNativeSampler)); + os << *(params->phostOffset); os << ", "; - os << ".hContext = "; + os << ".region = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->pregion); os << ", "; - os << ".pProperties = "; + os << ".bufferRowPitch = "; - ur_params::serializePtr(os, *(params->ppProperties)); + os << *(params->pbufferRowPitch); os << ", "; - os << ".phSampler = "; + os << ".bufferSlicePitch = "; - ur_params::serializePtr(os, *(params->pphSampler)); + os << *(params->pbufferSlicePitch); - return os; -} + os << ", "; + os << ".hostRowPitch = "; -inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_host_alloc_params_t *params) { + os << *(params->phostRowPitch); - os << ".hContext = "; + os << ", "; + os << ".hostSlicePitch = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->phostSlicePitch); os << ", "; - os << ".pUSMDesc = "; + os << ".pSrc = "; - ur_params::serializePtr(os, *(params->ppUSMDesc)); + ur::details::printPtr(os, + *(params->ppSrc)); os << ", "; - os << ".pool = "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->ppool)); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".size = "; + os << ".pSyncPointWaitList = "; - os << *(params->psize); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".ppMem = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->pppMem)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_device_alloc_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_read_rect_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_read_rect_exp_params_t *params) { - os << ".hContext = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".hDevice = "; + os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; - os << ".pUSMDesc = "; + os << ".bufferOffset = "; - ur_params::serializePtr(os, *(params->ppUSMDesc)); + os << *(params->pbufferOffset); os << ", "; - os << ".pool = "; + os << ".hostOffset = "; - ur_params::serializePtr(os, *(params->ppool)); + os << *(params->phostOffset); os << ", "; - os << ".size = "; + os << ".region = "; - os << *(params->psize); + os << *(params->pregion); os << ", "; - os << ".ppMem = "; - - ur_params::serializePtr(os, *(params->pppMem)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_shared_alloc_params_t *params) { - - os << ".hContext = "; + os << ".bufferRowPitch = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->pbufferRowPitch); os << ", "; - os << ".hDevice = "; + os << ".bufferSlicePitch = "; - ur_params::serializePtr(os, *(params->phDevice)); + os << *(params->pbufferSlicePitch); os << ", "; - os << ".pUSMDesc = "; + os << ".hostRowPitch = "; - ur_params::serializePtr(os, *(params->ppUSMDesc)); + os << *(params->phostRowPitch); os << ", "; - os << ".pool = "; + os << ".hostSlicePitch = "; - ur_params::serializePtr(os, *(params->ppool)); + os << *(params->phostSlicePitch); os << ", "; - os << ".size = "; + os << ".pDst = "; - os << *(params->psize); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; - os << ".ppMem = "; - - ur_params::serializePtr(os, *(params->pppMem)); - - return os; -} + os << ".numSyncPointsInWaitList = "; -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_free_params_t *params) { + os << *(params->pnumSyncPointsInWaitList); - os << ".hContext = "; + os << ", "; + os << ".pSyncPointWaitList = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".pMem = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_get_mem_alloc_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_mem_buffer_fill_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_mem_buffer_fill_exp_params_t *params) { - os << ".hContext = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".pMem = "; + os << ".hBuffer = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->phBuffer)); os << ", "; - os << ".propName = "; + os << ".pPattern = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppPattern)); os << ", "; - os << ".propSize = "; - - os << *(params->ppropSize); + os << ".patternSize = "; - os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << *(params->ppatternSize); os << ", "; - os << ".pPropSizeRet = "; - - ur_params::serializePtr(os, *(params->ppPropSizeRet)); - - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_pool_create_params_t *params) { - - os << ".hContext = "; + os << ".offset = "; - ur_params::serializePtr(os, *(params->phContext)); + os << *(params->poffset); os << ", "; - os << ".pPoolDesc = "; + os << ".size = "; - ur_params::serializePtr(os, *(params->ppPoolDesc)); + os << *(params->psize); os << ", "; - os << ".ppPool = "; + os << ".numSyncPointsInWaitList = "; - ur_params::serializePtr(os, *(params->pppPool)); + os << *(params->pnumSyncPointsInWaitList); - return os; -} + os << ", "; + os << ".pSyncPointWaitList = "; -inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_pool_retain_params_t *params) { + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); - os << ".pPool = "; + os << ", "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->ppPool)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_pool_release_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_usm_prefetch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_usm_prefetch_exp_params_t *params) { - os << ".pPool = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->ppPool)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); - return os; -} + os << ", "; + os << ".pMemory = "; -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_pool_get_info_params_t *params) { + ur::details::printPtr(os, + *(params->ppMemory)); - os << ".hPool = "; + os << ", "; + os << ".size = "; - ur_params::serializePtr(os, *(params->phPool)); + os << *(params->psize); os << ", "; - os << ".propName = "; + os << ".flags = "; - os << *(params->ppropName); + ur::details::printFlag(os, + *(params->pflags)); os << ", "; - os << ".propSize = "; + os << ".numSyncPointsInWaitList = "; - os << *(params->ppropSize); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << ".pSyncPointWaitList = "; + + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".pPropSizeRet = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_pitched_alloc_exp_params_t *params) { - - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_append_usm_advise_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_append_usm_advise_exp_params_t *params) { - os << ", "; - os << ".hDevice = "; - - ur_params::serializePtr(os, *(params->phDevice)); - - os << ", "; - os << ".pUSMDesc = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->ppUSMDesc)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".pool = "; + os << ".pMemory = "; - ur_params::serializePtr(os, *(params->ppool)); + ur::details::printPtr(os, + *(params->ppMemory)); os << ", "; - os << ".widthInBytes = "; + os << ".size = "; - os << *(params->pwidthInBytes); + os << *(params->psize); os << ", "; - os << ".height = "; + os << ".advice = "; - os << *(params->pheight); + ur::details::printFlag(os, + *(params->padvice)); os << ", "; - os << ".elementSizeBytes = "; + os << ".numSyncPointsInWaitList = "; - os << *(params->pelementSizeBytes); + os << *(params->pnumSyncPointsInWaitList); os << ", "; - os << ".ppMem = "; + os << ".pSyncPointWaitList = "; - ur_params::serializePtr(os, *(params->pppMem)); + ur::details::printPtr(os, + *(params->ppSyncPointWaitList)); os << ", "; - os << ".pResultPitch = "; + os << ".pSyncPoint = "; - ur_params::serializePtr(os, *(params->ppResultPitch)); + ur::details::printPtr(os, + *(params->ppSyncPoint)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_import_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_enqueue_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_enqueue_exp_params_t *params) { - os << ".hContext = "; + os << ".hCommandBuffer = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phCommandBuffer)); os << ", "; - os << ".pMem = "; + os << ".hQueue = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; - os << ".size = "; - - os << *(params->psize); - - return os; -} + os << ".numEventsInWaitList = "; -inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_release_exp_params_t *params) { + os << *(params->pnumEventsInWaitList); - os << ".hContext = "; + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; os << ", "; - os << ".pMem = "; + os << ".phEvent = "; - ur_params::serializePtr(os, *(params->ppMem)); + ur::details::printPtr(os, + *(params->pphEvent)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_p2p_enable_peer_access_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_p2p_enable_peer_access_exp_params_t *params) { os << ".commandDevice = "; - ur_params::serializePtr(os, *(params->pcommandDevice)); + ur::details::printPtr(os, + *(params->pcommandDevice)); os << ", "; os << ".peerDevice = "; - ur_params::serializePtr(os, *(params->ppeerDevice)); + ur::details::printPtr(os, + *(params->ppeerDevice)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_p2p_disable_peer_access_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_p2p_disable_peer_access_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_p2p_disable_peer_access_exp_params_t *params) { os << ".commandDevice = "; - ur_params::serializePtr(os, *(params->pcommandDevice)); + ur::details::printPtr(os, + *(params->pcommandDevice)); os << ", "; os << ".peerDevice = "; - ur_params::serializePtr(os, *(params->ppeerDevice)); + ur::details::printPtr(os, + *(params->ppeerDevice)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_p2p_peer_access_get_info_exp_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_p2p_peer_access_get_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_p2p_peer_access_get_info_exp_params_t *params) { os << ".commandDevice = "; - ur_params::serializePtr(os, *(params->pcommandDevice)); + ur::details::printPtr(os, + *(params->pcommandDevice)); os << ", "; os << ".peerDevice = "; - ur_params::serializePtr(os, *(params->ppeerDevice)); + ur::details::printPtr(os, + *(params->ppeerDevice)); os << ", "; os << ".propName = "; @@ -15008,29 +15302,62 @@ operator<<(std::ostream &os, os << ", "; os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_init_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_init_params_t *params) { + + os << ".device_flags = "; + + ur::details::printFlag(os, + *(params->pdevice_flags)); + + os << ", "; + os << ".hLoaderConfig = "; + + ur::details::printPtr(os, + *(params->phLoaderConfig)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_granularity_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_loader_tear_down_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_loader_tear_down_params_t *params) { + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_granularity_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_granularity_get_info_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".propName = "; @@ -15044,29 +15371,33 @@ operator<<(std::ostream &os, os << ", "; os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_reserve_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_reserve_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_reserve_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".pStart = "; - ur_params::serializePtr(os, *(params->ppStart)); + ur::details::printPtr(os, + *(params->ppStart)); os << ", "; os << ".size = "; @@ -15076,23 +15407,28 @@ operator<<(std::ostream &os, os << ", "; os << ".ppStart = "; - ur_params::serializePtr(os, *(params->pppStart)); + ur::details::printPtr(os, + *(params->pppStart)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_free_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_free_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_free_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".pStart = "; - ur_params::serializePtr(os, *(params->ppStart)); + ur::details::printPtr(os, + *(params->ppStart)); os << ", "; os << ".size = "; @@ -15102,17 +15438,22 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_virtual_mem_map_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_map_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_map_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".pStart = "; - ur_params::serializePtr(os, *(params->ppStart)); + ur::details::printPtr(os, + *(params->ppStart)); os << ", "; os << ".size = "; @@ -15122,7 +15463,8 @@ operator<<(std::ostream &os, const struct ur_virtual_mem_map_params_t *params) { os << ", "; os << ".hPhysicalMem = "; - ur_params::serializePtr(os, *(params->phPhysicalMem)); + ur::details::printPtr(os, + *(params->phPhysicalMem)); os << ", "; os << ".offset = "; @@ -15132,24 +15474,28 @@ operator<<(std::ostream &os, const struct ur_virtual_mem_map_params_t *params) { os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, - *(params->pflags)); + ur::details::printFlag(os, + *(params->pflags)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_unmap_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_unmap_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_unmap_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".pStart = "; - ur_params::serializePtr(os, *(params->ppStart)); + ur::details::printPtr(os, + *(params->ppStart)); os << ", "; os << ".size = "; @@ -15159,18 +15505,22 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_set_access_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_set_access_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_set_access_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".pStart = "; - ur_params::serializePtr(os, *(params->ppStart)); + ur::details::printPtr(os, + *(params->ppStart)); os << ", "; os << ".size = "; @@ -15180,24 +15530,28 @@ operator<<(std::ostream &os, os << ", "; os << ".flags = "; - ur_params::serializeFlag(os, - *(params->pflags)); + ur::details::printFlag(os, + *(params->pflags)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_virtual_mem_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_virtual_mem_get_info_params_t *params) { os << ".hContext = "; - ur_params::serializePtr(os, *(params->phContext)); + ur::details::printPtr(os, + *(params->phContext)); os << ", "; os << ".pStart = "; - ur_params::serializePtr(os, *(params->ppStart)); + ur::details::printPtr(os, + *(params->ppStart)); os << ", "; os << ".size = "; @@ -15216,23 +15570,27 @@ operator<<(std::ostream &os, os << ", "; os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_device_get_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_get_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_get_params_t *params) { os << ".hPlatform = "; - ur_params::serializePtr(os, *(params->phPlatform)); + ur::details::printPtr(os, + *(params->phPlatform)); os << ", "; os << ".DeviceType = "; @@ -15246,30 +15604,35 @@ inline std::ostream &operator<<(std::ostream &os, os << ", "; os << ".phDevices = {"; - for (size_t i = 0; - *(params->pphDevices) != NULL && i < *params->pNumEntries; ++i) { + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pNumEntries; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); } os << "}"; os << ", "; os << ".pNumDevices = "; - ur_params::serializePtr(os, *(params->ppNumDevices)); + ur::details::printPtr(os, + *(params->ppNumDevices)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_get_info_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_get_info_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_get_info_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".propName = "; @@ -15283,49 +15646,61 @@ operator<<(std::ostream &os, const struct ur_device_get_info_params_t *params) { os << ", "; os << ".pPropValue = "; - ur_params::serializeTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; - ur_params::serializePtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, + *(params->ppPropSizeRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_retain_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_retain_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); return os; } -inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_release_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_release_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_partition_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_partition_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_partition_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; os << ".NumDevices = "; @@ -15334,36 +15709,41 @@ operator<<(std::ostream &os, os << ", "; os << ".phSubDevices = {"; - for (size_t i = 0; - *(params->pphSubDevices) != NULL && i < *params->pNumDevices; ++i) { + for (size_t i = 0; *(params->pphSubDevices) != NULL && i < *params->pNumDevices; ++i) { if (i != 0) { os << ", "; } - ur_params::serializePtr(os, (*(params->pphSubDevices))[i]); + ur::details::printPtr(os, + (*(params->pphSubDevices))[i]); } os << "}"; os << ", "; os << ".pNumDevicesRet = "; - ur_params::serializePtr(os, *(params->ppNumDevicesRet)); + ur::details::printPtr(os, + *(params->ppNumDevicesRet)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_select_binary_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_select_binary_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_select_binary_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".pBinaries = "; - ur_params::serializePtr(os, *(params->ppBinaries)); + ur::details::printPtr(os, + *(params->ppBinaries)); os << ", "; os << ".NumBinaries = "; @@ -15373,240 +15753,166 @@ operator<<(std::ostream &os, os << ", "; os << ".pSelectedBinary = "; - ur_params::serializePtr(os, *(params->ppSelectedBinary)); + ur::details::printPtr(os, + *(params->ppSelectedBinary)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_get_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_get_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_get_native_handle_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".phNativeDevice = "; - ur_params::serializePtr(os, *(params->pphNativeDevice)); + ur::details::printPtr(os, + *(params->pphNativeDevice)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_create_with_native_handle_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_create_with_native_handle_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_create_with_native_handle_params_t *params) { os << ".hNativeDevice = "; - ur_params::serializePtr(os, *(params->phNativeDevice)); + ur::details::printPtr(os, + *(params->phNativeDevice)); os << ", "; os << ".hPlatform = "; - ur_params::serializePtr(os, *(params->phPlatform)); + ur::details::printPtr(os, + *(params->phPlatform)); os << ", "; os << ".pProperties = "; - ur_params::serializePtr(os, *(params->ppProperties)); + ur::details::printPtr(os, + *(params->ppProperties)); os << ", "; os << ".phDevice = "; - ur_params::serializePtr(os, *(params->pphDevice)); + ur::details::printPtr(os, + *(params->pphDevice)); return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_get_global_timestamps_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_get_global_timestamps_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_get_global_timestamps_params_t *params) { os << ".hDevice = "; - ur_params::serializePtr(os, *(params->phDevice)); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; os << ".pDeviceTimestamp = "; - ur_params::serializePtr(os, *(params->ppDeviceTimestamp)); + ur::details::printPtr(os, + *(params->ppDeviceTimestamp)); os << ", "; os << ".pHostTimestamp = "; - ur_params::serializePtr(os, *(params->ppHostTimestamp)); + ur::details::printPtr(os, + *(params->ppHostTimestamp)); return os; } -namespace ur_params { - -template inline void serializePtr(std::ostream &os, T *ptr) { +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +// @brief Print pointer value +template +inline ur_result_t printPtr(std::ostream &os, const T *ptr) { if (ptr == nullptr) { os << "nullptr"; } else if constexpr (std::is_pointer_v) { - os << (void *)(ptr) << " ("; - serializePtr(os, *ptr); + os << (const void *)(ptr) << " ("; + printPtr(os, *ptr); os << ")"; } else if constexpr (std::is_void_v || is_handle_v) { - os << (void *)ptr; + os << (const void *)ptr; } else if constexpr (std::is_same_v, char>) { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << ptr; os << ")"; } else { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << *ptr; os << ")"; } + + return UR_RESULT_SUCCESS; } +} // namespace ur::details -inline int serializeFunctionParams(std::ostream &os, uint32_t function, - const void *params) { - switch ((enum ur_function_t)function) { - case UR_FUNCTION_INIT: { - os << (const struct ur_init_params_t *)params; - } break; - case UR_FUNCTION_TEAR_DOWN: { - os << (const struct ur_tear_down_params_t *)params; - } break; - case UR_FUNCTION_ADAPTER_GET: { - os << (const struct ur_adapter_get_params_t *)params; - } break; - case UR_FUNCTION_ADAPTER_RELEASE: { - os << (const struct ur_adapter_release_params_t *)params; - } break; - case UR_FUNCTION_ADAPTER_RETAIN: { - os << (const struct ur_adapter_retain_params_t *)params; - } break; - case UR_FUNCTION_ADAPTER_GET_LAST_ERROR: { - os << (const struct ur_adapter_get_last_error_params_t *)params; - } break; - case UR_FUNCTION_ADAPTER_GET_INFO: { - os << (const struct ur_adapter_get_info_params_t *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: { - os << (const struct - ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP: { - os << (const struct - ur_bindless_images_sampled_image_handle_destroy_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_ALLOCATE_EXP: { - os << (const struct ur_bindless_images_image_allocate_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_FREE_EXP: { - os << (const struct ur_bindless_images_image_free_exp_params_t *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_CREATE_EXP: { - os << (const struct - ur_bindless_images_unsampled_image_create_exp_params_t *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_CREATE_EXP: { - os << (const struct ur_bindless_images_sampled_image_create_exp_params_t - *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_COPY_EXP: { - os << (const struct ur_bindless_images_image_copy_exp_params_t *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_GET_INFO_EXP: { - os << (const struct ur_bindless_images_image_get_info_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_GET_LEVEL_EXP: { - os << (const struct ur_bindless_images_mipmap_get_level_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_FREE_EXP: { - os << (const struct ur_bindless_images_mipmap_free_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_IMPORT_OPAQUE_FD_EXP: { - os << (const struct ur_bindless_images_import_opaque_fd_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_ARRAY_EXP: { - os << (const struct ur_bindless_images_map_external_array_exp_params_t - *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_RELEASE_INTEROP_EXP: { - os << (const struct ur_bindless_images_release_interop_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXP: { - os << (const struct - ur_bindless_images_import_external_semaphore_opaque_fd_exp_params_t - *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_DESTROY_EXTERNAL_SEMAPHORE_EXP: { - os << (const struct - ur_bindless_images_destroy_external_semaphore_exp_params_t *) - params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP: { - os << (const struct - ur_bindless_images_wait_external_semaphore_exp_params_t *)params; - } break; - case UR_FUNCTION_BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP: { - os << (const struct - ur_bindless_images_signal_external_semaphore_exp_params_t *) - params; - } break; - case UR_FUNCTION_COMMAND_BUFFER_CREATE_EXP: { - os << (const struct ur_command_buffer_create_exp_params_t *)params; - } break; - case UR_FUNCTION_COMMAND_BUFFER_RETAIN_EXP: { - os << (const struct ur_command_buffer_retain_exp_params_t *)params; +namespace ur::extras { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print function parameters +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - `NULL == params` +inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_t function, const void *params) { + if (!params) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + switch (function) { + case UR_FUNCTION_LOADER_CONFIG_CREATE: { + os << (const struct ur_loader_config_create_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_RELEASE_EXP: { - os << (const struct ur_command_buffer_release_exp_params_t *)params; + case UR_FUNCTION_LOADER_CONFIG_RETAIN: { + os << (const struct ur_loader_config_retain_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP: { - os << (const struct ur_command_buffer_finalize_exp_params_t *)params; + case UR_FUNCTION_LOADER_CONFIG_RELEASE: { + os << (const struct ur_loader_config_release_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP: { - os << (const struct ur_command_buffer_append_kernel_launch_exp_params_t - *)params; + case UR_FUNCTION_LOADER_CONFIG_GET_INFO: { + os << (const struct ur_loader_config_get_info_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP: { - os << (const struct ur_command_buffer_append_memcpy_usm_exp_params_t *) - params; + case UR_FUNCTION_LOADER_CONFIG_ENABLE_LAYER: { + os << (const struct ur_loader_config_enable_layer_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP: { - os << (const struct ur_command_buffer_append_membuffer_copy_exp_params_t - *)params; + case UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK: { + os << (const struct ur_loader_config_set_code_location_callback_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP: { - os << (const struct - ur_command_buffer_append_membuffer_write_exp_params_t *)params; + case UR_FUNCTION_PLATFORM_GET: { + os << (const struct ur_platform_get_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP: { - os << (const struct ur_command_buffer_append_membuffer_read_exp_params_t - *)params; + case UR_FUNCTION_PLATFORM_GET_INFO: { + os << (const struct ur_platform_get_info_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP: { - os << (const struct - ur_command_buffer_append_membuffer_copy_rect_exp_params_t *) - params; + case UR_FUNCTION_PLATFORM_GET_NATIVE_HANDLE: { + os << (const struct ur_platform_get_native_handle_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP: { - os << (const struct - ur_command_buffer_append_membuffer_write_rect_exp_params_t *) - params; + case UR_FUNCTION_PLATFORM_CREATE_WITH_NATIVE_HANDLE: { + os << (const struct ur_platform_create_with_native_handle_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP: { - os << (const struct - ur_command_buffer_append_membuffer_read_rect_exp_params_t *) - params; + case UR_FUNCTION_PLATFORM_GET_API_VERSION: { + os << (const struct ur_platform_get_api_version_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { - os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; + case UR_FUNCTION_PLATFORM_GET_BACKEND_OPTION: { + os << (const struct ur_platform_get_backend_option_params_t *)params; } break; case UR_FUNCTION_CONTEXT_CREATE: { os << (const struct ur_context_create_params_t *)params; @@ -15620,93 +15926,14 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_CONTEXT_GET_INFO: { os << (const struct ur_context_get_info_params_t *)params; } break; - case UR_FUNCTION_CONTEXT_GET_NATIVE_HANDLE: { - os << (const struct ur_context_get_native_handle_params_t *)params; - } break; - case UR_FUNCTION_CONTEXT_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_context_create_with_native_handle_params_t *) - params; - } break; - case UR_FUNCTION_CONTEXT_SET_EXTENDED_DELETER: { - os << (const struct ur_context_set_extended_deleter_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH: { - os << (const struct ur_enqueue_kernel_launch_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_EVENTS_WAIT: { - os << (const struct ur_enqueue_events_wait_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER: { - os << (const struct ur_enqueue_events_wait_with_barrier_params_t *) - params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ: { - os << (const struct ur_enqueue_mem_buffer_read_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE: { - os << (const struct ur_enqueue_mem_buffer_write_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ_RECT: { - os << (const struct ur_enqueue_mem_buffer_read_rect_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE_RECT: { - os << (const struct ur_enqueue_mem_buffer_write_rect_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY: { - os << (const struct ur_enqueue_mem_buffer_copy_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY_RECT: { - os << (const struct ur_enqueue_mem_buffer_copy_rect_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_FILL: { - os << (const struct ur_enqueue_mem_buffer_fill_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_IMAGE_READ: { - os << (const struct ur_enqueue_mem_image_read_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_IMAGE_WRITE: { - os << (const struct ur_enqueue_mem_image_write_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_IMAGE_COPY: { - os << (const struct ur_enqueue_mem_image_copy_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_BUFFER_MAP: { - os << (const struct ur_enqueue_mem_buffer_map_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_MEM_UNMAP: { - os << (const struct ur_enqueue_mem_unmap_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_USM_FILL: { - os << (const struct ur_enqueue_usm_fill_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_USM_MEMCPY: { - os << (const struct ur_enqueue_usm_memcpy_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_USM_PREFETCH: { - os << (const struct ur_enqueue_usm_prefetch_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_USM_ADVISE: { - os << (const struct ur_enqueue_usm_advise_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_USM_FILL_2D: { - os << (const struct ur_enqueue_usm_fill_2d_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_USM_MEMCPY_2D: { - os << (const struct ur_enqueue_usm_memcpy_2d_params_t *)params; - } break; - case UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_WRITE: { - os << (const struct ur_enqueue_device_global_variable_write_params_t *) - params; - } break; - case UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_READ: { - os << (const struct ur_enqueue_device_global_variable_read_params_t *) - params; + case UR_FUNCTION_CONTEXT_GET_NATIVE_HANDLE: { + os << (const struct ur_context_get_native_handle_params_t *)params; } break; - case UR_FUNCTION_ENQUEUE_READ_HOST_PIPE: { - os << (const struct ur_enqueue_read_host_pipe_params_t *)params; + case UR_FUNCTION_CONTEXT_CREATE_WITH_NATIVE_HANDLE: { + os << (const struct ur_context_create_with_native_handle_params_t *)params; } break; - case UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE: { - os << (const struct ur_enqueue_write_host_pipe_params_t *)params; + case UR_FUNCTION_CONTEXT_SET_EXTENDED_DELETER: { + os << (const struct ur_context_set_extended_deleter_params_t *)params; } break; case UR_FUNCTION_EVENT_GET_INFO: { os << (const struct ur_event_get_info_params_t *)params; @@ -15727,12 +15954,59 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_event_get_native_handle_params_t *)params; } break; case UR_FUNCTION_EVENT_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_event_create_with_native_handle_params_t *) - params; + os << (const struct ur_event_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_EVENT_SET_CALLBACK: { os << (const struct ur_event_set_callback_params_t *)params; } break; + case UR_FUNCTION_PROGRAM_CREATE_WITH_IL: { + os << (const struct ur_program_create_with_il_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY: { + os << (const struct ur_program_create_with_binary_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_BUILD: { + os << (const struct ur_program_build_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_BUILD_EXP: { + os << (const struct ur_program_build_exp_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_COMPILE: { + os << (const struct ur_program_compile_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_COMPILE_EXP: { + os << (const struct ur_program_compile_exp_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_LINK: { + os << (const struct ur_program_link_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_LINK_EXP: { + os << (const struct ur_program_link_exp_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_RETAIN: { + os << (const struct ur_program_retain_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_RELEASE: { + os << (const struct ur_program_release_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_GET_FUNCTION_POINTER: { + os << (const struct ur_program_get_function_pointer_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_GET_INFO: { + os << (const struct ur_program_get_info_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_GET_BUILD_INFO: { + os << (const struct ur_program_get_build_info_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_SET_SPECIALIZATION_CONSTANTS: { + os << (const struct ur_program_set_specialization_constants_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_GET_NATIVE_HANDLE: { + os << (const struct ur_program_get_native_handle_params_t *)params; + } break; + case UR_FUNCTION_PROGRAM_CREATE_WITH_NATIVE_HANDLE: { + os << (const struct ur_program_create_with_native_handle_params_t *)params; + } break; case UR_FUNCTION_KERNEL_CREATE: { os << (const struct ur_kernel_create_params_t *)params; } break; @@ -15755,8 +16029,7 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_kernel_get_native_handle_params_t *)params; } break; case UR_FUNCTION_KERNEL_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_kernel_create_with_native_handle_params_t *) - params; + os << (const struct ur_kernel_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_KERNEL_SET_ARG_VALUE: { os << (const struct ur_kernel_set_arg_value_params_t *)params; @@ -15777,23 +16050,28 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_kernel_set_arg_mem_obj_params_t *)params; } break; case UR_FUNCTION_KERNEL_SET_SPECIALIZATION_CONSTANTS: { - os << (const struct ur_kernel_set_specialization_constants_params_t *) - params; + os << (const struct ur_kernel_set_specialization_constants_params_t *)params; } break; - case UR_FUNCTION_LOADER_CONFIG_CREATE: { - os << (const struct ur_loader_config_create_params_t *)params; + case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: { + os << (const struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t *)params; } break; - case UR_FUNCTION_LOADER_CONFIG_RETAIN: { - os << (const struct ur_loader_config_retain_params_t *)params; + case UR_FUNCTION_SAMPLER_CREATE: { + os << (const struct ur_sampler_create_params_t *)params; } break; - case UR_FUNCTION_LOADER_CONFIG_RELEASE: { - os << (const struct ur_loader_config_release_params_t *)params; + case UR_FUNCTION_SAMPLER_RETAIN: { + os << (const struct ur_sampler_retain_params_t *)params; } break; - case UR_FUNCTION_LOADER_CONFIG_GET_INFO: { - os << (const struct ur_loader_config_get_info_params_t *)params; + case UR_FUNCTION_SAMPLER_RELEASE: { + os << (const struct ur_sampler_release_params_t *)params; } break; - case UR_FUNCTION_LOADER_CONFIG_ENABLE_LAYER: { - os << (const struct ur_loader_config_enable_layer_params_t *)params; + case UR_FUNCTION_SAMPLER_GET_INFO: { + os << (const struct ur_sampler_get_info_params_t *)params; + } break; + case UR_FUNCTION_SAMPLER_GET_NATIVE_HANDLE: { + os << (const struct ur_sampler_get_native_handle_params_t *)params; + } break; + case UR_FUNCTION_SAMPLER_CREATE_WITH_NATIVE_HANDLE: { + os << (const struct ur_sampler_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_MEM_IMAGE_CREATE: { os << (const struct ur_mem_image_create_params_t *)params; @@ -15814,12 +16092,10 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_mem_get_native_handle_params_t *)params; } break; case UR_FUNCTION_MEM_BUFFER_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_mem_buffer_create_with_native_handle_params_t *) - params; + os << (const struct ur_mem_buffer_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_MEM_IMAGE_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_mem_image_create_with_native_handle_params_t *) - params; + os << (const struct ur_mem_image_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_MEM_GET_INFO: { os << (const struct ur_mem_get_info_params_t *)params; @@ -15836,74 +16112,98 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_PHYSICAL_MEM_RELEASE: { os << (const struct ur_physical_mem_release_params_t *)params; } break; - case UR_FUNCTION_PLATFORM_GET: { - os << (const struct ur_platform_get_params_t *)params; + case UR_FUNCTION_ADAPTER_GET: { + os << (const struct ur_adapter_get_params_t *)params; } break; - case UR_FUNCTION_PLATFORM_GET_INFO: { - os << (const struct ur_platform_get_info_params_t *)params; + case UR_FUNCTION_ADAPTER_RELEASE: { + os << (const struct ur_adapter_release_params_t *)params; } break; - case UR_FUNCTION_PLATFORM_GET_NATIVE_HANDLE: { - os << (const struct ur_platform_get_native_handle_params_t *)params; + case UR_FUNCTION_ADAPTER_RETAIN: { + os << (const struct ur_adapter_retain_params_t *)params; } break; - case UR_FUNCTION_PLATFORM_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_platform_create_with_native_handle_params_t *) - params; + case UR_FUNCTION_ADAPTER_GET_LAST_ERROR: { + os << (const struct ur_adapter_get_last_error_params_t *)params; } break; - case UR_FUNCTION_PLATFORM_GET_API_VERSION: { - os << (const struct ur_platform_get_api_version_params_t *)params; + case UR_FUNCTION_ADAPTER_GET_INFO: { + os << (const struct ur_adapter_get_info_params_t *)params; } break; - case UR_FUNCTION_PLATFORM_GET_BACKEND_OPTION: { - os << (const struct ur_platform_get_backend_option_params_t *)params; + case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH: { + os << (const struct ur_enqueue_kernel_launch_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_CREATE_WITH_IL: { - os << (const struct ur_program_create_with_il_params_t *)params; + case UR_FUNCTION_ENQUEUE_EVENTS_WAIT: { + os << (const struct ur_enqueue_events_wait_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY: { - os << (const struct ur_program_create_with_binary_params_t *)params; + case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER: { + os << (const struct ur_enqueue_events_wait_with_barrier_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_BUILD: { - os << (const struct ur_program_build_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ: { + os << (const struct ur_enqueue_mem_buffer_read_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_BUILD_EXP: { - os << (const struct ur_program_build_exp_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE: { + os << (const struct ur_enqueue_mem_buffer_write_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_COMPILE: { - os << (const struct ur_program_compile_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_READ_RECT: { + os << (const struct ur_enqueue_mem_buffer_read_rect_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_COMPILE_EXP: { - os << (const struct ur_program_compile_exp_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_WRITE_RECT: { + os << (const struct ur_enqueue_mem_buffer_write_rect_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_LINK: { - os << (const struct ur_program_link_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY: { + os << (const struct ur_enqueue_mem_buffer_copy_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_LINK_EXP: { - os << (const struct ur_program_link_exp_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_COPY_RECT: { + os << (const struct ur_enqueue_mem_buffer_copy_rect_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_RETAIN: { - os << (const struct ur_program_retain_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_FILL: { + os << (const struct ur_enqueue_mem_buffer_fill_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_RELEASE: { - os << (const struct ur_program_release_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_IMAGE_READ: { + os << (const struct ur_enqueue_mem_image_read_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_GET_FUNCTION_POINTER: { - os << (const struct ur_program_get_function_pointer_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_IMAGE_WRITE: { + os << (const struct ur_enqueue_mem_image_write_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_GET_INFO: { - os << (const struct ur_program_get_info_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_IMAGE_COPY: { + os << (const struct ur_enqueue_mem_image_copy_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_GET_BUILD_INFO: { - os << (const struct ur_program_get_build_info_params_t *)params; + case UR_FUNCTION_ENQUEUE_MEM_BUFFER_MAP: { + os << (const struct ur_enqueue_mem_buffer_map_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_SET_SPECIALIZATION_CONSTANTS: { - os << (const struct ur_program_set_specialization_constants_params_t *) - params; + case UR_FUNCTION_ENQUEUE_MEM_UNMAP: { + os << (const struct ur_enqueue_mem_unmap_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_GET_NATIVE_HANDLE: { - os << (const struct ur_program_get_native_handle_params_t *)params; + case UR_FUNCTION_ENQUEUE_USM_FILL: { + os << (const struct ur_enqueue_usm_fill_params_t *)params; } break; - case UR_FUNCTION_PROGRAM_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_program_create_with_native_handle_params_t *) - params; + case UR_FUNCTION_ENQUEUE_USM_MEMCPY: { + os << (const struct ur_enqueue_usm_memcpy_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_PREFETCH: { + os << (const struct ur_enqueue_usm_prefetch_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_ADVISE: { + os << (const struct ur_enqueue_usm_advise_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_FILL_2D: { + os << (const struct ur_enqueue_usm_fill_2d_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_MEMCPY_2D: { + os << (const struct ur_enqueue_usm_memcpy_2d_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_WRITE: { + os << (const struct ur_enqueue_device_global_variable_write_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_DEVICE_GLOBAL_VARIABLE_READ: { + os << (const struct ur_enqueue_device_global_variable_read_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_READ_HOST_PIPE: { + os << (const struct ur_enqueue_read_host_pipe_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE: { + os << (const struct ur_enqueue_write_host_pipe_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params; } break; case UR_FUNCTION_QUEUE_GET_INFO: { os << (const struct ur_queue_get_info_params_t *)params; @@ -15921,8 +16221,7 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_queue_get_native_handle_params_t *)params; } break; case UR_FUNCTION_QUEUE_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_queue_create_with_native_handle_params_t *) - params; + os << (const struct ur_queue_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_QUEUE_FINISH: { os << (const struct ur_queue_finish_params_t *)params; @@ -15930,24 +16229,56 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_QUEUE_FLUSH: { os << (const struct ur_queue_flush_params_t *)params; } break; - case UR_FUNCTION_SAMPLER_CREATE: { - os << (const struct ur_sampler_create_params_t *)params; + case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: { + os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params; } break; - case UR_FUNCTION_SAMPLER_RETAIN: { - os << (const struct ur_sampler_retain_params_t *)params; + case UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP: { + os << (const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t *)params; } break; - case UR_FUNCTION_SAMPLER_RELEASE: { - os << (const struct ur_sampler_release_params_t *)params; + case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_ALLOCATE_EXP: { + os << (const struct ur_bindless_images_image_allocate_exp_params_t *)params; } break; - case UR_FUNCTION_SAMPLER_GET_INFO: { - os << (const struct ur_sampler_get_info_params_t *)params; + case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_FREE_EXP: { + os << (const struct ur_bindless_images_image_free_exp_params_t *)params; } break; - case UR_FUNCTION_SAMPLER_GET_NATIVE_HANDLE: { - os << (const struct ur_sampler_get_native_handle_params_t *)params; + case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_CREATE_EXP: { + os << (const struct ur_bindless_images_unsampled_image_create_exp_params_t *)params; } break; - case UR_FUNCTION_SAMPLER_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_sampler_create_with_native_handle_params_t *) - params; + case UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_CREATE_EXP: { + os << (const struct ur_bindless_images_sampled_image_create_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_COPY_EXP: { + os << (const struct ur_bindless_images_image_copy_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_IMAGE_GET_INFO_EXP: { + os << (const struct ur_bindless_images_image_get_info_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_GET_LEVEL_EXP: { + os << (const struct ur_bindless_images_mipmap_get_level_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_MIPMAP_FREE_EXP: { + os << (const struct ur_bindless_images_mipmap_free_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_IMPORT_OPAQUE_FD_EXP: { + os << (const struct ur_bindless_images_import_opaque_fd_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_ARRAY_EXP: { + os << (const struct ur_bindless_images_map_external_array_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_RELEASE_INTEROP_EXP: { + os << (const struct ur_bindless_images_release_interop_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXP: { + os << (const struct ur_bindless_images_import_external_semaphore_opaque_fd_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_DESTROY_EXTERNAL_SEMAPHORE_EXP: { + os << (const struct ur_bindless_images_destroy_external_semaphore_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP: { + os << (const struct ur_bindless_images_wait_external_semaphore_exp_params_t *)params; + } break; + case UR_FUNCTION_BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP: { + os << (const struct ur_bindless_images_signal_external_semaphore_exp_params_t *)params; } break; case UR_FUNCTION_USM_HOST_ALLOC: { os << (const struct ur_usm_host_alloc_params_t *)params; @@ -15985,20 +16316,74 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_USM_RELEASE_EXP: { os << (const struct ur_usm_release_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_CREATE_EXP: { + os << (const struct ur_command_buffer_create_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_RETAIN_EXP: { + os << (const struct ur_command_buffer_retain_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_RELEASE_EXP: { + os << (const struct ur_command_buffer_release_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP: { + os << (const struct ur_command_buffer_finalize_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP: { + os << (const struct ur_command_buffer_append_kernel_launch_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: { + os << (const struct ur_command_buffer_append_usm_memcpy_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: { + os << (const struct ur_command_buffer_append_usm_fill_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_copy_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_write_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_read_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_write_rect_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_read_rect_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP: { + os << (const struct ur_command_buffer_append_mem_buffer_fill_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP: { + os << (const struct ur_command_buffer_append_usm_prefetch_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP: { + os << (const struct ur_command_buffer_append_usm_advise_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { + os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; case UR_FUNCTION_USM_P2P_DISABLE_PEER_ACCESS_EXP: { - os << (const struct ur_usm_p2p_disable_peer_access_exp_params_t *) - params; + os << (const struct ur_usm_p2p_disable_peer_access_exp_params_t *)params; } break; case UR_FUNCTION_USM_P2P_PEER_ACCESS_GET_INFO_EXP: { - os << (const struct ur_usm_p2p_peer_access_get_info_exp_params_t *) - params; + os << (const struct ur_usm_p2p_peer_access_get_info_exp_params_t *)params; + } break; + case UR_FUNCTION_LOADER_INIT: { + os << (const struct ur_loader_init_params_t *)params; + } break; + case UR_FUNCTION_LOADER_TEAR_DOWN: { + os << (const struct ur_loader_tear_down_params_t *)params; } break; case UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO: { - os << (const struct ur_virtual_mem_granularity_get_info_params_t *) - params; + os << (const struct ur_virtual_mem_granularity_get_info_params_t *)params; } break; case UR_FUNCTION_VIRTUAL_MEM_RESERVE: { os << (const struct ur_virtual_mem_reserve_params_t *)params; @@ -16040,17 +16425,16 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_device_get_native_handle_params_t *)params; } break; case UR_FUNCTION_DEVICE_CREATE_WITH_NATIVE_HANDLE: { - os << (const struct ur_device_create_with_native_handle_params_t *) - params; + os << (const struct ur_device_create_with_native_handle_params_t *)params; } break; case UR_FUNCTION_DEVICE_GET_GLOBAL_TIMESTAMPS: { os << (const struct ur_device_get_global_timestamps_params_t *)params; } break; default: - return -1; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } - return 0; + return UR_RESULT_SUCCESS; } -} // namespace ur_params +} // namespace ur::extras -#endif /* UR_PARAMS_HPP */ +#endif /* UR_PRINT_HPP */ diff --git a/scripts/Doxyfile b/scripts/Doxyfile index c038d5276d..0340076c7a 100644 --- a/scripts/Doxyfile +++ b/scripts/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "Intel One API Unified Runtime API" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = v0.7 +PROJECT_NUMBER = v0.9 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -848,7 +848,8 @@ RECURSIVE = YES # Note that relative paths are relative to the directory from which doxygen is # run. -EXCLUDE = README.md +EXCLUDE = README.md \ + ../include/ur_print.hpp # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded diff --git a/scripts/YaML.md b/scripts/YaML.md index 291e4263c7..ee22cd39d6 100644 --- a/scripts/YaML.md +++ b/scripts/YaML.md @@ -616,13 +616,18 @@ class ur_name_t(Structure): - `out` is used for params that are write-only; if the param is a pointer, then the memory being pointed to is also write-only - `in,out` is used for params that are both read and write; typically this is used for pointers to other data structures that contain both read and write params - `nocheck` is used to specify that no additional validation checks will be generated. - + `desc` may include one the following annotations: {`"[optional]"`, `"[range(start,end)]"`, `"[release]"`, `"[typename(typeVarName)]"`} + + `desc` may include one the following annotations: {`"[optional]"`, `"[range(start,end)]"`, `"[release]"`, `"[typename(typeVarName)]"`, `"[bounds(offset,size)]"`} - `optional` is used for params that are handles or pointers where it is legal for the value to be `nullptr` - `range` is used for params that are array pointers to specify the valid range that the is valid to read + `start` and `end` must be an ISO-C standard identifier or literal + `start` is inclusive and `end` is exclusive - `release` is used for params that are handles or pointers to handles where the function will destroy any backing memory associated with the handle(s) - `typename` is used to denote the type enum for params that are opaque pointers to values of tagged data types. + - `bounds` is used for params that are memory objects or USM allocations. It specifies the range within the memory allocation represented by the param that will be accessed by the operation. + + `offset` and `size` must be an ISO-C standard identifier or literal + + The sum of `offset` and `size` will be compared against the size of the memory allocation represented by the param. + + If `offset` and `size` are not both integers they must be of the types `$x_rect_offset` and `$x_rect_region` respectively. + + If `bounds` is used the operation must also take a parameter of type `$x_queue_handle_t` + `type` must be an ISO-C standard identifier + `name` must be a unique ISO-C standard identifier - A param may take the following optional scalar field: {`init`, `version`} diff --git a/scripts/ci.py b/scripts/ci.py deleted file mode 100644 index 05a9141248..0000000000 --- a/scripts/ci.py +++ /dev/null @@ -1,80 +0,0 @@ -import os -import sys -import argparse -import re -import fileinput -from distutils import dir_util -import util - -script_dir = os.path.dirname(os.path.abspath(__file__)) -root_dir = os.path.dirname(script_dir) - - -""" -Entry-point: - publishes HTML for GitLab pages -""" -def publish_gitlab_html(): - src_html_dir = os.path.join(root_dir, "docs", "html") - src_img_dir = os.path.join(root_dir, "images") - tmp_dir = os.path.join(root_dir, ".public") - tmp_img_dir = os.path.join(root_dir, ".public/images") - publishing_dir = os.path.join(root_dir, "public") - - # Remove dest dirs - if os.path.exists(tmp_dir): - print("Deleting temp dir: %s" % tmp_dir) - util.removePath(tmp_dir) - if os.path.exists(publishing_dir): - print("Deleting publishing dir: %s" % publishing_dir) - util.removePath(publishing_dir) - - # Copy over generated content to new folder - print("Copying html files from '%s' to '%s'" % (src_html_dir, tmp_dir)) - dir_util.copy_tree(src_html_dir, tmp_dir) - - # Fixes html files by converting paths relative to root html folder instead of repo - print("Fixing paths in html files in '%s' to be relative to root..." % (tmp_dir)) - regex_pattern = re.compile(r'\.\.[\/|\\]images') - files = util.findFiles(tmp_dir, "*.html") - print("Found %s files" % (len(files))) - with fileinput.FileInput(files=files, inplace=True) as f: - for line in f: - print(re.sub(regex_pattern, './images', line), end='') - - # Publish new folder to GitLab Pages folder (/public) - print("Publishing to GitLab pages by renaming '%s' to '%s'" % (tmp_dir, publishing_dir)) - os.rename(tmp_dir, publishing_dir) - - -""" -Entry-point: - main() -""" -def main(args=sys.argv[1:]): - # Define args - parser = argparse.ArgumentParser() - parser.add_argument( - "--publish-html", - help="Publish html", - action="store_true") - - # Parse args - options = parser.parse_args(args) - - # Publish GitLab html - if options.publish_html: - try: - publish_gitlab_html() - except Exception as e: - print(e) - print("Failed") - return 1 - - print("Done") - return 0 - - -if __name__ == '__main__': - sys.exit(main()) -# END OF FILE diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index f40307e34e..cf2c8e870b 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -13,9 +13,19 @@ accepted into the project. .. important:: - Before making a contribution you *should* determine if the change should be - made directly to the core specification or introduced as an experimental - feature. The criteria we use to make this distinction are as follows: + Any contributions that fall into the following criteria *must* follow the + `Adapter Change Process`_: + + * Changing the API/ABI of the specification and or loader. + + * Changing the implementation of an adapter. + + * Changing the implementation of shared/common code used by an adapter. + + Before making a contribution to the specification you *should* determine if + the change should be made directly to the core specification or introduced + as an experimental feature. The criteria we use to make this distinction + are as follows: * The feature exists to enable an experimental feature in a parallel language runtime being built on top of Unified Runtime. @@ -39,6 +49,114 @@ accepted into the project. Runtime team via the `GitHub issue tracker `_. +Adapter Change Process +====================== + +1. Create a pull request containing the adapter changes in the + `oneapi-src/unified-runtime`_ project targeting the `main + `_ branch. + +2. Create a draft pull request in the `intel/llvm`_ project to take advantage + of the pre-merge testing. Add any required implementation changes in + addition to changing: + + * `UNIFIED_RUNTIME_REPO`_ to point at your fork of Unified Runtime. + + * `UNIFIED_RUNTIME_TAG`_ to point at your development branch name used to + create the Unified Runtime pull request in step 1. + +3. Add a comment in the *oneapi-src/unified-runtime* pull request linking to + the *intel/llvm* pull request created in step 2. + +4. Code reviews for the adapter changes are carried out in the + *oneapi-src/unified-runtime* pull request. + +5. Any new commits to the *oneapi-src/unified-runtime* pull request *must* be + accompanied by a corresponding update in the *intel/llvm* pull request as + indicated in step 2, so the testing is always up-to-date. + +6. The Unified Runtime maintainers *must* ensure that step 5 has been carried + out and that all pre-merge testing has passed before accepting the + *oneapi-src/unified-runtime* pull request. + +7. Once the *oneapi-src/unified-runtime* pull request is accepted: + + * Reverse the change to `UNIFIED_RUNTIME_REPO`_ made in step 2. + * Update the `UNIFIED_RUNTIME_TAG`_ to point at the + *oneapi-src/unified-runtime* commit/tag containing the merged adapter + changes. + * Update the pull request description, linking to any other *intel/llvm* + pull requests who's changes have been merged into + *oneapi-src/unified-runtime* but have not yet been merge into + *intel/llvm*. + * Mark the *intel/llvm* pull request as ready for review and follow their + review process. + +.. _oneapi-src/unified-runtime: + https://github.com/oneapi-src/unified-runtime +.. _intel/llvm: + https://github.com/intel/llvm +.. _UNIFIED_RUNTIME_REPO: + https://github.com/intel/llvm/blob/sycl/sycl/plugins/unified_runtime/CMakeLists.txt#L7 +.. _UNIFIED_RUNTIME_TAG: + https://github.com/intel/llvm/blob/sycl/sycl/plugins/unified_runtime/CMakeLists.txt#L8 + +Build Environment +================= + +To be able to generate the source from the YAML files, the build environment +must be configured correctly and all dependencies must be installed. The +instructions for a basic setup are available in the `README +`_. + +The following additional dependencies are required to support the ``generate`` +target: + +* Doxygen (>= 1.8) + +* The Python script requirements listed in `thirdparty/requirements.txt`_ + +Doxygen can be installed via your system's package manager, e.g. on Ubuntu +``sudo apt install doxygen``, or by downloading it from the Doxygen website. It +must be available on the current ``PATH`` when the script is run. + +One way to install the requirements for the script is using a Python virtual +environment. This can be set up by running the following commands from the +project root: + +.. code-block:: console + + $ python3 -m venv .local + $ source .local/bin/activate + $ pip install -r third_party/requirements.txt + +The virtual environment can be subsequently reactivated before any builds +without needing to reinstall the requirements: + +.. code-block:: console + + $ source .local/bin/activate + +Alternatively, a Docker container can be used instead of a virtual environment. +Instructions on building and using a Docker image can be found in +`.github/docker`_ + +You *must* also enable the ``UR_FORMAT_CPP_STYLE`` CMake option to allow +formatting of the generated code, or the ``generate`` target will not be +available. + +.. code-block:: console + + $ cmake build/ -DUR_FORMAT_CPP_STYLE=ON + +You can then follow the instructions below to use the ``generate`` target to +regenerate the source. + +.. _thirdparty/requirements.txt: + https://github.com/oneapi-src/unified-runtime/blob/main/third_party/requirements.txt +.. _.github/docker: + https://github.com/oneapi-src/unified-runtime/blob/main/.github/docker + Generating Source ================= @@ -46,10 +164,9 @@ The specification and many other components in the Unified Runtime repository are generated from a set of YAML_ files which are used as inputs to a Mako_ based templating system. The YAML file syntax is defined in `YAML syntax`_. To generate the outputs of the Mako templates a build directory must be -configured, instructions are available in the `README -`_ file. -Upon successfully configuring a build directory, generate the outputs with the -following command (or suitable build system equivalent): +configured as detailed above. Upon successfully configuring a build directory, +generate the outputs with the following command (or suitable build system +equivalent): .. code-block:: console @@ -137,8 +254,8 @@ defined within them, with the following exceptions: enumerations, and structure type enumerations. * `scripts/core/enqueue.yml`_ defines commands which can be enqueued on a queue object. -* `scripts/core/runtime.yml`_ defines global symbols pertaining to - initialization and tear down of the entire runtime. +* `scripts/core/loader.yml`_ defines global symbols pertaining to + initialization and tear down of the loader. * `scripts/core/registry.yml`_ contains an enumeration of all entry-points, past and present, for use in the XPTI tracing framework. It is automatically updated so shouldn't require manual editing. @@ -148,8 +265,8 @@ defined within them, with the following exceptions: https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/common.yml .. _scripts/core/enqueue.yml: https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/enqueue.yml -.. _scripts/core/runtime.yml: - https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/runtime.yml +.. _scripts/core/loader.yml: + https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/loader.yml .. _scripts/core/registry.yml: https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/registry.yml diff --git a/scripts/core/CUDA.rst b/scripts/core/CUDA.rst new file mode 100644 index 0000000000..9771693113 --- /dev/null +++ b/scripts/core/CUDA.rst @@ -0,0 +1,167 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +========================== +CUDA UR Reference Document +========================== + +This document gives general guidelines of how to use UR to load and build +programs, and execute kernels on a CUDA device. + +Device code +=========== + +A CUDA device image may be made of PTX and/or SASS, two different kinds of +device code for NVIDIA GPUs. + +CUDA device images can be generated by a CUDA-capable compiler toolchain. Most +CUDA compiler toolchains are capable of generating PTX, SASS and/or bundles of +PTX and SASS. + +When generating device code to be launched using Unified Runtime, it is +recommended to use a programming model with explicit kernel parameters, such as +OpenCL or CUDA. This is because kernels generated by a programming model with +implicit kernel parameters, such as SYCL, cannot guarantee any specific number +or ordering of kernel parameters. It has been observed that kernel signatures +for the same SYCL kernel may vary significantly when compiled for different +architectures. + +PTX +--- + +PTX is a high level NVIDIA ISA which can be JIT compiled at runtime by the CUDA +driver. In UR, this JIT compilation happens at ${x}ProgramBuild, where PTX is +assembled into device specific SASS which then can run on device. + +PTX is forward compatible, so PTX generated for ``.target sm_52`` will be JIT +compiled without issue for devices with a greater compute capability than +``sm_52``. Whereas PTX generated for ``sm_80`` cannot be JIT compiled for an +``sm_60`` device. + +An advantage of using PTX over SASS is that one code can run on multiple +devices. However, PTX generated for an older arch may not give access to newer +hardware instructions, such as new atomic operations, or tensor core +instructions. + +JIT compilation has some overhead at ${x}ProgramBuild, especially if the program +that is being loaded contains multiple kernels. The ``ptxjitcompiler`` keeps a +JIT cache, however, so this overhead is only paid the first time that a program +is built. JIT caching may be turned off by setting the environment variable +``CUDA_CACHE_DISABLE=1``. + +SASS +---- + +SASS is a device specific binary which may be produced by ``ptxas`` or some +other tool. SASS is specific to an individual arch and is not portable across +arches. + +A SASS file may be stored as a ``.cubin`` file by NVIDIA tools. + +UR Programs +=========== + +A ${x}_program_handle_t has a one to one mapping with the CUDA driver object +`CUModule `_. + +In UR for CUDA, a ${x}_program_handle_t can be created using +${x}ProgramCreateWithBinary with: + +* A single PTX module, stored as a null terminated ``uint8_t`` buffer. +* A single SASS module, stored as an opaque ``uint8_t`` buffer. +* A mixed PTX/SASS module, where the SASS module is the assembled PTX module. + +A ${x}_program_handle_t is valid only for a single architecture. If a CUDA +compatible binary contains device code for multiple NVIDIA architectures, it is +the user's responsibility to split these separate device images so that +${x}ProgramCreateWithBinary is only called with a device binary for a single +device arch. + +If a program is large and contains many kernels, loading and/or JIT compiling +the program may have a high overhead. This can be mitigated by splitting a +program into multiple smaller programs (corresponding to PTX/SASS files). In +this way, an application will only pay the overhead of loading/compiling +kernels that it will likely use. + +Using PTX Modules in UR +----------------------- + +A PTX module will be loaded and JIT compiled for the necessary architecture at +${x}ProgramBuild. If the PTX module has been generated for a compute capability +greater than the compute capability of the device, then ${x}ProgramBuild will +fail with the error ``CUDA_ERROR_NO_BINARY_FOR_GPU``. + +A PTX module passed to ${x}ProgramBuild must contain only one PTX file. +Separate PTX files are to be handled separately. + +Arguments may be passed to the ``ptxjitcompiler`` via ${x}ProgramBuild. +Currently ``maxrregcount`` is the only supported argument. + +.. parsed-literal:: + + ${x}ProgramBuild(ctx, program, "maxrregcount=128"); + + +Using SASS Modules in UR +------------------------ + +A SASS module will be loaded and checked for compatibility at ${x}ProgramBuild. +If the SASS module is incompatible with the device arch then ${x}ProgramBuild +will fail with the error ``CUDA_ERROR_NO_BINARY_FOR_GPU``. + +Using Mixed PTX/SASS Bundles in UR +---------------------------------- + +Mixed PTX/SASS modules can be used to make a program with +${x}ProgramCreateWithBinary. At ${x}ProgramBuild the CUDA driver will check +whether the bundled SASS is compatible with the active device. If the SASS is +compatible then the ${x}_program_handle_t will be built from the SASS, and if +not then the PTX will be used as a fallback and JIT compiled by the CUDA +driver. If both PTX and SASS are incompatible with the active device then +${x}ProgramBuild will fail with the error ``CUDA_ERROR_NO_BINARY_FOR_GPU``. + +UR Kernels +========== + +Once ${x}ProgramCreateWithBinary and ${x}ProgramBuild have succeeded, kernels +can be fetched from programs with ${x}KernelCreate. ${x}KernelCreate must be +called with the exact name of the kernel in the PTX/SASS module. This name will +depend on the mangling used when compiling the kernel, so it is recommended to +examine the symbols in the PTX/SASS module before trying to extract kernels in +UR. + +.. code-block:: console + + $ cuobjdump --dump-elf-symbols hello.cubin | grep mykernel + _Z13mykernelv + +At present it is not possible to query the names of the kernels in a UR program +for CUDA, so it is necessary to know the (mangled or otherwise) names of kernels +in advance or by some other means. + +UR kernels can be dispatched with ${x}EnqueueKernelLaunch. The argument +``pGlobalWorkOffset`` can only be used if the kernels have been instrumented to +take the extra global offset argument. Use of the global offset is not +recommended for non SYCL compiler toolchains. This parameter can be ignored if +the user does not wish to use the global offset. + +Other Notes +=========== + +- The environment variable ``SYCL_PI_CUDA_MAX_LOCAL_MEM_SIZE`` can be set in + order to exceed the default max dynamic local memory size. More information + can be found + `here `_. +- The size of primitive datatypes may differ in host and device code. For + instance, NVCC treats ``long double`` as 8 bytes for device and 16 bytes for + host. +- In kernel ``printf`` for NVPTX targets does not support the ``%z`` modifier. + +Contributors +------------ + +* Hugh Delaney `hugh.delaney@codeplay.com `_ + diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index 071fe799fd..c794c199d9 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -68,6 +68,8 @@ Enums ${X}_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC ${X}_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR ${X}_STRUCTURE_TYPE_EXP_WIN32_HANDLE + ${X}_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES + ${X}_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES * ${x}_device_info_t * ${X}_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP @@ -127,6 +129,8 @@ Types * ${x}_exp_interop_semaphore_desc_t * ${x}_exp_file_descriptor_t * ${x}_exp_win32_handle_t +* ${x}_exp_layered_image_properties_t +* ${x}_exp_sampler_addr_modes_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -176,6 +180,10 @@ Changelog +----------+-------------------------------------------------------------+ | 6.0 | Fix semaphore import function parameter name. | +----------+-------------------------------------------------------------+ +| 7.0 | Add layered image properties struct. | ++----------+-------------------------------------------------------------+ +| 8.0 | Added structure for sampler addressing modes per dimension. | ++------------------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a169117022..a6a32a66a1 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -92,13 +92,17 @@ of event handles. Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp -* ${x}CommandBufferAppendMemcpyUSMExp -* ${x}CommandBufferAppendMembufferCopyExp -* ${x}CommandBufferAppendMembufferCopyRectExp -* ${x}CommandBufferAppendMembufferReadExp -* ${x}CommandBufferAppendMembufferReadRectExp -* ${x}CommandBufferAppendMembufferWriteExp -* ${x}CommandBufferAppendMembufferWriteRectExp +* ${x}CommandBufferAppendUSMMemcpyExp +* ${x}CommandBufferAppendUSMFillExp +* ${x}CommandBufferAppendMemBufferCopyExp +* ${x}CommandBufferAppendMemBufferCopyRectExp +* ${x}CommandBufferAppendMemBufferReadExp +* ${x}CommandBufferAppendMemBufferReadRectExp +* ${x}CommandBufferAppendMemBufferWriteExp +* ${x}CommandBufferAppendMemBufferWriteRectExp +* ${x}CommandBufferAppendMemBufferFillExp +* ${x}CommandBufferAppendUSMPrefetchExp +* ${x}CommandBufferAppendUSMAdviseExp It is planned to eventually support any command type from the Core API which can actually be appended to the equiavalent adapter native constructs. @@ -118,7 +122,7 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); // Append a kernel launch with syncPoint as a dependency, ignore returned @@ -167,13 +171,17 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP @@ -191,13 +199,17 @@ Functions * ${x}CommandBufferReleaseExp * ${x}CommandBufferFinalizeExp * ${x}CommandBufferAppendKernelLaunchExp -* ${x}CommandBufferAppendMemcpyUSMExp -* ${x}CommandBufferAppendMembufferCopyExp -* ${x}CommandBufferAppendMembufferCopyRectExp -* ${x}CommandBufferAppendMembufferReadExp -* ${x}CommandBufferAppendMembufferReadRectExp -* ${x}CommandBufferAppendMembufferWriteExp -* ${x}CommandBufferAppendMembufferWriteRectExp +* ${x}CommandBufferAppendUSMMemcpyExp +* ${x}CommandBufferAppendUSMFillExp +* ${x}CommandBufferAppendMemBufferCopyExp +* ${x}CommandBufferAppendMemBufferCopyRectExp +* ${x}CommandBufferAppendMemBufferReadExp +* ${x}CommandBufferAppendMemBufferReadRectExp +* ${x}CommandBufferAppendMemBufferWriteExp +* ${x}CommandBufferAppendMemBufferWriteRectExp +* ${x}CommandBufferAppendMemBufferFillExp +* ${x}CommandBufferAppendUSMPrefetchExp +* ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp Changelog @@ -208,7 +220,12 @@ Changelog +===========+=======================================================+ | 1.0 | Initial Draft | +-----------+-------------------------------------------------------+ -| 1.1 | add function definitions for buffer read and write | +| 1.1 | Add function definitions for buffer read and write | ++-----------+-------------------------------------------------------+ +| 1.2 | Add function definitions for fill commands | ++-----------+-------------------------------------------------------+ +| 1.3 | Add function definitions for Prefetch and Advise | +| | commands | +-----------+-------------------------------------------------------+ Contributors diff --git a/scripts/core/EXP-COOPERATIVE-KERNELS.rst b/scripts/core/EXP-COOPERATIVE-KERNELS.rst new file mode 100644 index 0000000000..c6b64ef669 --- /dev/null +++ b/scripts/core/EXP-COOPERATIVE-KERNELS.rst @@ -0,0 +1,68 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-cooperative-kernels: + +================================================================================ +Cooperative Kernels +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- +Cooperative kernels are kernels that use cross-workgroup synchronization +features. All enqueued workgroups must run concurrently for cooperative kernels +to execute without hanging. This experimental feature provides an API for +querying the maximum number of workgroups and launching cooperative kernels. + +Any device can support cooperative kernels by restricting the maximum number of +workgroups to 1. Devices that support cross-workgroup synchronization can +specify a larger maximum for a given cooperative kernel. + +The functions defined here align with those specified in Level Zero. + +API +-------------------------------------------------------------------------------- + +Macros +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${X}_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueCooperativeKernelLaunchExp +* ${x}KernelSuggestMaxCooperativeGroupCountExp + +Changelog +-------------------------------------------------------------------------------- ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return the valid string +defined in ``${X}_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP`` +as one of the options from ${x}DeviceGetInfo when querying for +${X}_DEVICE_INFO_EXTENSIONS. Conversely, before using any of the +functionality defined in this experimental feature the user *must* use the +device query to determine if the adapter supports this feature. + +Contributors +-------------------------------------------------------------------------------- +* Michael Aziz `michael.aziz@intel.com `_ diff --git a/scripts/core/HIP.rst b/scripts/core/HIP.rst new file mode 100644 index 0000000000..3ded0138ff --- /dev/null +++ b/scripts/core/HIP.rst @@ -0,0 +1,103 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +============================= +AMD HIP UR Reference Document +============================= + +This document gives general guidelines of how to use UR to execute kernels on +a AMD HIP device. + +Device code +=========== + +Unlike the NVPTX platform, AMDGPU does not use a device IR that can be JIT +compiled at runtime. Therefore, all device binaries must be precompiled for a +particular arch. + +The naming of AMDGPU device code files may vary across different generations +of devices. ``.hsa`` or ``.hsaco`` are common extensions as of 2023. + +HIPCC can generate device code for a particular arch using the ``--genco`` flag + +.. code-block:: console + + $ hipcc --genco hello.cu --amdgpu-target=gfx906 -o hello.hsaco + +When generating device code to be launched using Unified Runtime, it is +recommended to use a programming model with explicit kernel parameters, such as +OpenCL or HIP. This is because kernels generated by a programming model with +implicit kernel parameters, such as SYCL, cannot guarantee any specific number +or ordering of kernel parameters. It has been observed that kernel signatures +for the same SYCL kernel may vary significantly when compiled for different +architectures. + +UR Programs +=========== + +A ${x}_program_handle_t has a one to one mapping with the HIP runtime object +`hipModule_t `__ + +In UR for HIP, a ${x}_program_handle_t can be created using +${x}ProgramCreateWithBinary with: + +* A single device code module + +A ${x}_program_handle_t is valid only for a single architecture. If a HIP +compatible binary contains device code for multiple AMDGPU architectures, it is +the user's responsibility to split these separate device images so that +${x}ProgramCreateWithBinary is only called with a device binary for a single +device arch. + +If the AMDGPU module is incompatible with the device arch then ${x}ProgramBuild +will fail with the error ``hipErrorNoBinaryForGpu``. + +If a program is large and contains many kernels, loading the program may have a +high overhead. This can be mitigated by splitting a program into multiple +smaller programs. In this way, an application will only pay the overhead of +loading kernels that it will likely use. + +Kernels +======= + +Once ${x}ProgramCreateWithBinary and ${x}ProgramBuild have succeeded, kernels +can be fetched from programs with ${x}KernelCreate. ${x}KernelCreate must be +called with the exact name of the kernel in the AMDGPU device code module. This +name will depend on the mangling used when compiling the kernel, so it is +recommended to examine the symbols in the AMDGPU device code module before +trying to extract kernels in UR code. + +``llvm-objdump`` or ``readelf`` may not correctly view the symbols in an AMDGPU +device module. It may be necessary to call ``clang-offload-bundler`` first in +order to extract the ``ELF`` file that can be passed to ``readelf``. + +.. code-block:: console + + $ clang-offload-bundler --unbundle --input=hello.hsaco --output=hello.o \ + --targets=hipv4-amdgcn-amd-amdhsa--gfx906 --type=o + $ readelf hello.o -s | grep mykernel + _Z13mykernelv + +At present it is not possible to query the names of the kernels in a UR program +for HIP, so it is necessary to know the (mangled or otherwise) names of kernels +in advance or by some other means. + +UR kernels can be dispatched with ${x}EnqueueKernelLaunch. The argument +``pGlobalWorkOffset`` can only be used if the kernels have been instrumented to +take the extra global offset argument. Use of the global offset is not +recommended for non SYCL compiler toolchains. This parameter can be ignored if +the user does not wish to use the global offset. + +Other Notes +=========== + +- In kernel ``printf`` may not work for certain ROCm versions. + +Contributors +------------ + +* Hugh Delaney `hugh.delaney@codeplay.com `_ + diff --git a/scripts/core/INTRO.rst b/scripts/core/INTRO.rst index 4c3a1a9d2d..d557193ef0 100644 --- a/scripts/core/INTRO.rst +++ b/scripts/core/INTRO.rst @@ -66,7 +66,7 @@ The following design philosophies are adopted to reduce Host-side overhead: + This should be handled by validation layer(s) - - By default, neither the driver nor device provide may provide any protection against the following: + - By default, the driver or device may not provide any protection against the following: + Invalid API programming + Invalid function arguments @@ -145,6 +145,14 @@ and followed by a digit or uppercase letter are reserved for use by the implemen Applications which use Unified Runtime must not provide definitions of these symbols. This allows the Unified Runtime shared library to be updated with additional symbols for new API versions or extensions without causing symbol conflicts with existing applications. +Printing API +------------ +## --validate=off +The header "${x}_print.hpp" contains the "${x}::print" namespace with the output stream operator (<<) overloads for Unified Runtime objects. +There is also the "${x}::extras::printFunctionParams" function for printing function parameters. These parameters have to be provided in a \*params_t struct format suitable for +a given function. +## --validate=on + Tracing --------------------- @@ -296,9 +304,18 @@ Specific environment variables can be set to control the behavior of unified run This environment variable is ignored when :envvar:`UR_ADAPTERS_FORCE_LOAD` environment variable is used. +.. envvar:: UR_ADAPTERS_DEEP_BIND + + If set, the loader will use `RTLD_DEEPBIND` when opening adapter libraries. This might be useful if an adapter + requires a different version of a shared library compared to the rest of the applcation. + + .. note:: + + This environment variable is Linux-only. + .. envvar:: UR_ENABLE_LAYERS - Holds a comma-separated list of layers to enable in addition to any specified via ``urInit``. + Holds a comma-separated list of layers to enable in addition to any specified via ``urLoaderInit``. .. note:: diff --git a/scripts/core/runtime.yml b/scripts/core/adapter.yml similarity index 52% rename from scripts/core/runtime.yml rename to scripts/core/adapter.yml index c14f939cc2..a2331244e1 100644 --- a/scripts/core/runtime.yml +++ b/scripts/core/adapter.yml @@ -1,5 +1,5 @@ # -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2023 Intel Corporation # # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT @@ -9,176 +9,8 @@ # --- #-------------------------------------------------------------------------- type: header -desc: "Intel $OneApi Unified Runtime APIs for Runtime" -ordinal: "0" ---- #-------------------------------------------------------------------------- -type: enum -desc: "Supported device initialization flags" -class: $x -name: $x_device_init_flags_t -etors: - - name: GPU - desc: "initialize GPU device adapters." - - name: CPU - desc: "initialize CPU device adapters." - - name: FPGA - desc: "initialize FPGA device adapters." - - name: MCA - desc: "initialize MCA device adapters." - - name: VPU - desc: "initialize VPU device adapters." ---- #-------------------------------------------------------------------------- -type: function -desc: "Create a loader config object." -class: $xLoaderConfig -loader_only: True -name: Create -decl: static -params: - - type: $x_loader_config_handle_t* - name: phLoaderConfig - desc: "[out] Pointer to handle of loader config object created." ---- #-------------------------------------------------------------------------- -type: function -desc: "Get a reference to the loader config object." -class: $xLoaderConfig -loader_only: True -name: Retain -decl: static -details: - - "Get a reference to the loader config handle. Increment its reference count" - - "The application may call this function from simultaneous threads." - - "The implementation of this function should be lock-free." -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] loader config handle to retain" ---- #-------------------------------------------------------------------------- -type: function -desc: "Release config handle." -class: $xLoaderConfig -loader_only: True -name: Release -decl: static -details: - - "Decrement reference count and destroy the config handle if reference count becomes zero." - - "The application may call this function from simultaneous threads." - - "The implementation of this function should be lock-free." -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] config handle to release" ---- #-------------------------------------------------------------------------- -type: enum -desc: "Supported loader info" -class: $xLoaderConfig -name: $x_loader_config_info_t -typed_etors: True -etors: - - name: AVAILABLE_LAYERS - desc: "[char[]] Null-terminated, semi-colon separated list of available layers." - - name: REFERENCE_COUNT - desc: "[uint32_t] Reference count of the loader config object." ---- #-------------------------------------------------------------------------- -type: function -desc: "Retrieves various information about the loader." -class: $xLoaderConfig -loader_only: True -name: GetInfo -decl: static -details: - - "The application may call this function from simultaneous threads." - - "The implementation of this function should be lock-free." -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] handle of the loader config object" - - type: $x_loader_config_info_t - name: propName - desc: "[in] type of the info to retrieve" - - type: "size_t" - name: propSize - desc: | - [in] the number of bytes pointed to by pPropValue. - - type: "void*" - name: pPropValue - desc: | - [out][optional][typename(propName, propSize)] array of bytes holding the info. - If propSize is not equal to or greater than the real number of bytes needed to return the info - then the $X_RESULT_ERROR_INVALID_SIZE error is returned and pPropValue is not used. - - type: "size_t*" - name: pPropSizeRet - desc: | - [out][optional] pointer to the actual size in bytes of the queried propName. -returns: - - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: - - "If `propName` is not supported by the loader." - - $X_RESULT_ERROR_INVALID_SIZE: - - "`propSize == 0 && pPropValue != NULL`" - - "If `propSize` is less than the real number of bytes needed to return the info." - - $X_RESULT_ERROR_INVALID_NULL_POINTER: - - "`propSize != 0 && pPropValue == NULL`" - - "`pPropValue == NULL && pPropSizeRet == NULL`" - - $X_RESULT_ERROR_INVALID_DEVICE - - $X_RESULT_ERROR_OUT_OF_RESOURCES - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY ---- #-------------------------------------------------------------------------- -type: function -desc: "Enable a layer for the specified loader config." -class: $xLoaderConfig -loader_only: True -name: EnableLayer -decl: static -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] Handle to config object the layer will be enabled for." - - type: const char* - name: pLayerName - desc: "[in] Null terminated string containing the name of the layer to enable." -returns: - - $X_RESULT_ERROR_LAYER_NOT_PRESENT: - - "If layer specified with `pLayerName` can't be found by the loader." ---- #-------------------------------------------------------------------------- -type: function -desc: "Initialize the $OneApi adapter(s)" -class: $x -name: Init -decl: static -ordinal: "0" -details: - - "The application must call this function before calling any other function." - - "If this function is not called then all other functions will return $X_RESULT_ERROR_UNINITIALIZED." - - "Only one instance of each adapter will be initialized per process." - - "The application may call this function multiple times with different flags or environment variables enabled." - - "The application must call this function after forking new processes. Each forked process must call this function." - - "The application may call this function from simultaneous threads." - - "The implementation of this function must be thread-safe for scenarios where multiple libraries may initialize the adapter(s) simultaneously." -params: - - type: $x_device_init_flags_t - name: device_flags - desc: | - [in] device initialization flags. - must be 0 (default) or a combination of $x_device_init_flag_t. - init: "0" - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in][optional] Handle of loader config handle." -returns: - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY ---- #-------------------------------------------------------------------------- -type: function -desc: "Tear down the $OneApi instance and release all its resources" -class: $x -name: TearDown -decl: static +desc: "Intel $OneApi Unified Runtime APIs for Adapter" ordinal: "1" -params: - - type: void* - name: pParams - desc: "[in] pointer to tear down parameters" -returns: - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY --- #-------------------------------------------------------------------------- type: function desc: "Retrieves all available adapters" @@ -218,7 +50,8 @@ name: AdapterRelease decl: static ordinal: "3" details: - - "When the reference count of the adapter reaches zero, the adapter may perform adapter-specififc resource teardown" + - "When the reference count of the adapter reaches zero, the adapter may perform adapter-specififc resource teardown. Resources + must be left in a state where it safe for the adapter to be subsequently reinitialized with $xAdapterGet" params: - type: "$x_adapter_handle_t" name: hAdapter diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 27f2100feb..3999fa70f2 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -131,7 +131,7 @@ params: name: NumEntries desc: | [in] the number of devices to be added to phDevices. - If phDevices in not NULL then NumEntries should be greater than zero, otherwise $X_RESULT_ERROR_INVALID_VALUE, + If phDevices is not NULL, then NumEntries should be greater than zero. Otherwise $X_RESULT_ERROR_INVALID_SIZE will be returned. - type: "$x_device_handle_t*" name: phDevices @@ -144,6 +144,10 @@ params: [out][optional] pointer to the number of devices. pNumDevices will be updated with the total number of devices available. returns: + - $X_RESULT_ERROR_INVALID_SIZE: + - "`NumEntries == 0 && phDevices != NULL`" + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`NumEntries > 0 && phDevices == NULL`" - $X_RESULT_ERROR_INVALID_VALUE --- #-------------------------------------------------------------------------- type: enum diff --git a/scripts/core/enqueue.yml b/scripts/core/enqueue.yml index aef1d8023b..7af03074c9 100644 --- a/scripts/core/enqueue.yml +++ b/scripts/core/enqueue.yml @@ -158,7 +158,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: bool name: blockingRead desc: "[in] indicates blocking (true), non-blocking (false)" @@ -211,7 +211,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: bool name: blockingWrite desc: "[in] indicates blocking (true), non-blocking (false)" @@ -265,7 +265,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(bufferOrigin, region)] handle of the buffer object" - type: bool name: blockingRead desc: "[in] indicates blocking (true), non-blocking (false)" @@ -341,7 +341,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(bufferOrigin, region)] handle of the buffer object" - type: bool name: blockingWrite desc: "[in] indicates blocking (true), non-blocking (false)" @@ -414,10 +414,10 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBufferSrc - desc: "[in] handle of the src buffer object" + desc: "[in][bounds(srcOffset, size)] handle of the src buffer object" - type: $x_mem_handle_t name: hBufferDst - desc: "[in] handle of the dest buffer object" + desc: "[in][bounds(dstOffset, size)] handle of the dest buffer object" - type: size_t name: srcOffset desc: "[in] offset into hBufferSrc to begin copying from" @@ -466,10 +466,10 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBufferSrc - desc: "[in] handle of the source buffer object" + desc: "[in][bounds(srcOrigin, region)] handle of the source buffer object" - type: $x_mem_handle_t name: hBufferDst - desc: "[in] handle of the dest buffer object" + desc: "[in][bounds(dstOrigin, region)] handle of the dest buffer object" - type: $x_rect_offset_t name: srcOrigin desc: "[in] 3D offset in the source buffer" @@ -537,7 +537,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: "const void*" name: pPattern desc: "[in] pointer to the fill pattern" @@ -571,6 +571,11 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: + - "`patternSize == 0 || size == 0`" + - "`patternSize > size`" + - "`(patternSize & (patternSize - 1)) != 0`" + - "`size % patternSize != 0`" + - "`offset % patternSize != 0`" - "If `offset + size` results in an out-of-bounds access." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -590,7 +595,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hImage - desc: "[in] handle of the image object" + desc: "[in][bounds(origin, region)] handle of the image object" - type: bool name: blockingRead desc: "[in] indicates blocking (true), non-blocking (false)" @@ -629,6 +634,8 @@ returns: - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`region.width == 0 || region.height == 0 || region.depth == 0`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -647,7 +654,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hImage - desc: "[in] handle of the image object" + desc: "[in][bounds(origin, region)] handle of the image object" - type: bool name: blockingWrite desc: "[in] indicates blocking (true), non-blocking (false)" @@ -686,6 +693,8 @@ returns: - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`region.width == 0 || region.height == 0 || region.depth == 0`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -702,10 +711,10 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hImageSrc - desc: "[in] handle of the src image object" + desc: "[in][bounds(srcOrigin, region)] handle of the src image object" - type: $x_mem_handle_t name: hImageDst - desc: "[in] handle of the dest image object" + desc: "[in][bounds(dstOrigin, region)] handle of the dest image object" - type: $x_rect_offset_t name: srcOrigin desc: "[in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D image" @@ -735,6 +744,8 @@ returns: - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`region.width == 0 || region.height == 0 || region.depth == 0`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -831,7 +842,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: bool name: blockingMap desc: "[in] indicates blocking (true), non-blocking (false)" @@ -985,8 +996,8 @@ params: name: hQueue desc: "[in] handle of the queue object" - type: void* - name: ptr - desc: "[in] pointer to USM memory object" + name: pMem + desc: "[in][bounds(0, size)] pointer to USM memory object" - type: size_t name: patternSize desc: "[in] the size in bytes of the pattern. Must be a power of 2 and less than or equal to width." @@ -1039,10 +1050,10 @@ params: desc: "[in] blocking or non-blocking copy" - type: void* name: pDst - desc: "[in] pointer to the destination USM memory object" + desc: "[in][bounds(0, size)] pointer to the destination USM memory object" - type: "const void*" name: pSrc - desc: "[in] pointer to the source USM memory object" + desc: "[in][bounds(0, size)] pointer to the source USM memory object" - type: size_t name: size desc: "[in] size in bytes to be copied" @@ -1077,13 +1088,16 @@ desc: "Enqueue a command to prefetch USM memory" class: $xEnqueue name: USMPrefetch ordinal: "0" +details: + - "Prefetching may not be supported for all devices or allocation types. If memory prefetching + is not supported, the prefetch hint will be ignored." params: - type: $x_queue_handle_t name: hQueue desc: "[in] handle of the queue object" - type: "const void*" name: pMem - desc: "[in] pointer to the USM memory object" + desc: "[in][bounds(0, size)] pointer to the USM memory object" - type: size_t name: size desc: "[in] size in bytes to be fetched" @@ -1117,17 +1131,20 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Enqueue a command to set USM memory advice" +desc: "Enqueue a command to set USM memory advice" class: $xEnqueue name: USMAdvise ordinal: "0" +details: + - "Not all memory advice hints may be supported for all devices or allocation types. + If a memory advice hint is not supported, it will be ignored." params: - type: $x_queue_handle_t name: hQueue desc: "[in] handle of the queue object" - type: "const void*" name: pMem - desc: "[in] pointer to the USM memory object" + desc: "[in][bounds(0, size)] pointer to the USM memory object" - type: size_t name: size desc: "[in] size in bytes to be advised" @@ -1159,7 +1176,7 @@ params: desc: "[in] handle of the queue to submit to." - type: void* name: pMem - desc: "[in] pointer to memory to be filled." + desc: "[in][bounds(0, pitch * height)] pointer to memory to be filled." - type: size_t name: pitch desc: "[in] the total width of the destination memory including padding." @@ -1221,13 +1238,13 @@ params: desc: "[in] indicates if this operation should block the host." - type: void* name: pDst - desc: "[in] pointer to memory where data will be copied." + desc: "[in][bounds(0, dstPitch * height)] pointer to memory where data will be copied." - type: size_t name: dstPitch desc: "[in] the total width of the source memory including padding." - type: "const void*" name: pSrc - desc: "[in] pointer to memory to be copied." + desc: "[in][bounds(0, srcPitch * height)] pointer to memory to be copied." - type: size_t name: srcPitch desc: "[in] the total width of the source memory including padding." @@ -1441,7 +1458,7 @@ params: - type: $x_event_handle_t* name: phEvent desc: | - [out] returns an event object that identifies this write command + [out][optional] returns an event object that identifies this write command and can be used to query or queue a wait for this command to complete. returns: - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: diff --git a/scripts/core/event.yml b/scripts/core/event.yml index c20a8e04c9..ba0ae968c8 100644 --- a/scripts/core/event.yml +++ b/scripts/core/event.yml @@ -185,6 +185,8 @@ params: name: pPropSizeRet desc: "[out][optional] pointer to the actual size in bytes returned in propValue" returns: + - $X_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: + - "If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`." - $X_RESULT_ERROR_INVALID_VALUE: - "`pPropValue && propSize == 0`" - $X_RESULT_ERROR_INVALID_EVENT @@ -317,13 +319,13 @@ desc: "Event states for all events." class: $xEvent name: $x_execution_info_t etors: - - name: EXECUTION_INFO_COMPLETE + - name: COMPLETE desc: "Indicates that the event has completed." - - name: EXECUTION_INFO_RUNNING + - name: RUNNING desc: "Indicates that the device has started processing this event." - - name: EXECUTION_INFO_SUBMITTED + - name: SUBMITTED desc: "Indicates that the event has been submitted by the host to the device." - - name: EXECUTION_INFO_QUEUED + - name: QUEUED desc: "Indicates that the event has been queued, this is the initial state of events." --- #-------------------------------------------------------------------------- type: fptr_typedef @@ -349,6 +351,7 @@ decl: static ordinal: "0" details: - "The registered callback function will be called when the execution status of command associated with event changes to an execution status equal to or past the status specified by command_exec_status." + - "`execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the initial state of all events." - "The application may call this function from simultaneous threads for the same context." - "The implementation of this function should be thread-safe." params: @@ -364,3 +367,6 @@ params: - type: void* name: pUserData desc: "[in][out][optional] pointer to data to be passed to callback." +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "`execStatus == UR_EXECUTION_INFO_QUEUED`" diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index 846e97ac61..b5f87a6633 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -107,6 +107,12 @@ etors: - name: EXP_WIN32_HANDLE desc: $x_exp_win32_handle_t value: "0x2004" + - name: EXP_LAYERED_IMAGE_PROPERTIES + desc: $x_exp_layered_image_properties_t + value: "0x2005" + - name: EXP_SAMPLER_ADDR_MODES + desc: $x_exp_sampler_addr_modes_t + value: "0x2006" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -173,6 +179,19 @@ members: desc: "[in] mipmap filter mode used for filtering between mipmap levels" --- #-------------------------------------------------------------------------- type: struct +desc: "Describes unique sampler addressing mode per dimension" +details: + - Specify these properties in $xSamplerCreate via $x_sampler_desc_t as part + of a `pNext` chain. +class: $xBindlessImages +name: $x_exp_sampler_addr_modes_t +base: $x_base_properties_t +members: + - type: $x_sampler_addressing_mode_t[3] + name: addrModes + desc: "[in] Specify the address mode of the sampler per dimension" +--- #-------------------------------------------------------------------------- +type: struct desc: "Describes an interop memory resource descriptor" class: $xBindlessImages name: $x_exp_interop_mem_desc_t @@ -186,6 +205,20 @@ name: $x_exp_interop_semaphore_desc_t base: $x_base_desc_t members: [] --- #-------------------------------------------------------------------------- +type: struct +desc: "Describes layered image properties" +details: + - Specify these properties in $xBindlessImagesUnsampledImageCreateExp or + $xBindlessImagesSampledImageCreateExp via $x_image_desc_t as part of a + `pNext` chain. +class: $xBindlessImages +name: $x_exp_layered_image_properties_t +base: $x_base_properties_t +members: + - type: uint32_t + name: numLayers + desc: "[in] number of layers the image should have" +--- #-------------------------------------------------------------------------- type: function desc: "USM allocate pitched memory" class: $xUSM diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index e8c5417831..7d1b686aab 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -180,7 +180,7 @@ returns: type: function desc: "Append a USM memcpy command to a command-buffer object" class: $xCommandBuffer -name: AppendMemcpyUSMExp +name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -217,9 +217,54 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function +desc: "Append a USM fill command to a command-buffer object" +class: $xCommandBuffer +name: AppendUSMFillExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: "void*" + name: pMemory + desc: "[in] pointer to USM allocated memory to fill." + - type: "const void*" + name: pPattern + desc: "[in] pointer to the fill pattern." + - type: "size_t" + name: patternSize + desc: "[in] size in bytes of the pattern." + - type: "size_t" + name: size + desc: "[in] fill size in bytes, must be a multiple of patternSize." + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_SIZE: + - "`patternSize == 0 || size == 0`" + - "`patternSize > size`" + - "`(patternSize & (patternSize - 1)) != 0`" + - "`size % patternSize != 0`" + - "If `size` is higher than the allocation size of `ptr`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function desc: "Append a memory copy command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferCopyExp +name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -261,7 +306,7 @@ returns: type: function desc: "Append a memory write command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferWriteExp +name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -300,7 +345,7 @@ returns: type: function desc: "Append a memory read command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferReadExp +name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -339,7 +384,7 @@ returns: type: function desc: "Append a rectangular memory copy command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferCopyRectExp +name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -393,7 +438,7 @@ returns: type: function desc: "Append a rectangular memory write command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferWriteRectExp +name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -447,7 +492,7 @@ returns: type: function desc: "Append a rectangular memory read command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferReadRectExp +name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -499,6 +544,134 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function +desc: "Append a memory fill command to a command-buffer object" +class: $xCommandBuffer +name: AppendMemBufferFillExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: $x_mem_handle_t + name: hBuffer + desc: "[in] handle of the buffer object." + - type: "const void*" + name: pPattern + desc: "[in] pointer to the fill pattern." + - type: "size_t" + name: patternSize + desc: "[in] size in bytes of the pattern." + - type: "size_t" + name: offset + desc: "[in] offset into the buffer." + - type: "size_t" + name: size + desc: "[in] fill size in bytes, must be a multiple of patternSize." + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: $x_exp_command_buffer_sync_point_t* + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "If `offset + size` results in an out-of-bounds access." + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Append a USM Prefetch command to a command-buffer object" +class: $xCommandBuffer +name: AppendUSMPrefetchExp +details: + - "Prefetching may not be supported for all devices or allocation types. If memory prefetching + is not supported, the prefetch hint will be ignored." +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: "const void*" + name: pMemory + desc: "[in] pointer to USM allocated memory to prefetch." + - type: "size_t" + name: size + desc: "[in] size in bytes to be fetched." + - type: $x_usm_migration_flags_t + name: flags + desc: "[in] USM prefetch flags" + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`size == 0`" + - "If `size` is higher than the allocation size of `pMemory`" + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Append a USM Advise command to a command-buffer object" +class: $xCommandBuffer +name: AppendUSMAdviseExp +details: + - "Not all memory advice hints may be supported for all devices or allocation types. + If a memory advice hint is not supported, it will be ignored." +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: "const void*" + name: pMemory + desc: "[in] pointer to the USM memory object." + - type: "size_t" + name: size + desc: "[in] size in bytes to be advised." + - type: $x_usm_advice_flags_t + name: advice + desc: "[in] USM memory advice" + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`size == 0`" + - "If `size` is higher than the allocation size of `pMemory`" + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function desc: "Submit a command-buffer for execution on a queue." class: $xCommandBuffer name: EnqueueExp diff --git a/scripts/core/exp-cooperative-kernels.yml b/scripts/core/exp-cooperative-kernels.yml new file mode 100644 index 0000000000..fb2c6b3a4a --- /dev/null +++ b/scripts/core/exp-cooperative-kernels.yml @@ -0,0 +1,85 @@ +# +# Copyright (C) 2023 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental APIs for Cooperative Kernels" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: macro +desc: | + The extension string which defines support for cooperative-kernels + which is returned when querying device extensions. +name: $X_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP +value: "\"$x_exp_cooperative_kernels\"" +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue a command to execute a cooperative kernel" +class: $xEnqueue +name: CooperativeKernelLaunchExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_kernel_handle_t + name: hKernel + desc: "[in] handle of the kernel object" + - type: uint32_t + name: workDim + desc: "[in] number of dimensions, from 1 to 3, to specify the global and work-group work-items" + - type: "const size_t*" + name: pGlobalWorkOffset + desc: "[in] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item" + - type: "const size_t*" + name: pGlobalWorkSize + desc: "[in] pointer to an array of workDim unsigned values that specify the number of global work-items in workDim that will execute the kernel function" + - type: "const size_t*" + name: pLocalWorkSize + desc: | + [in][optional] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function. + If nullptr, the runtime implementation will choose the work-group size. + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. + - type: $x_event_handle_t* + name: phEvent + desc: | + [out][optional] return an event object that identifies this particular kernel execution instance. +returns: + - $X_RESULT_ERROR_INVALID_QUEUE + - $X_RESULT_ERROR_INVALID_KERNEL + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Query the maximum number of work groups for a cooperative kernel" +class: $xKernel +name: SuggestMaxCooperativeGroupCountExp +params: + - type: $x_kernel_handle_t + name: hKernel + desc: "[in] handle of the kernel object" + - type: "uint32_t*" + name: "pGroupCountRet" + desc: "[out] pointer to maximum number of groups" +returns: + - $X_RESULT_ERROR_INVALID_KERNEL diff --git a/scripts/core/loader.yml b/scripts/core/loader.yml new file mode 100644 index 0000000000..b5ad1eadec --- /dev/null +++ b/scripts/core/loader.yml @@ -0,0 +1,227 @@ +# +# Copyright (C) 2022-2023 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime APIs for Loader" +ordinal: "0" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Supported device initialization flags" +class: $x +name: $x_device_init_flags_t +etors: + - name: GPU + desc: "initialize GPU device adapters." + - name: CPU + desc: "initialize CPU device adapters." + - name: FPGA + desc: "initialize FPGA device adapters." + - name: MCA + desc: "initialize MCA device adapters." + - name: VPU + desc: "initialize VPU device adapters." +--- #-------------------------------------------------------------------------- +type: function +desc: "Create a loader config object." +class: $xLoaderConfig +loader_only: True +name: Create +decl: static +params: + - type: $x_loader_config_handle_t* + name: phLoaderConfig + desc: "[out] Pointer to handle of loader config object created." +--- #-------------------------------------------------------------------------- +type: function +desc: "Get a reference to the loader config object." +class: $xLoaderConfig +loader_only: True +name: Retain +decl: static +details: + - "Get a reference to the loader config handle. Increment its reference count" + - "The application may call this function from simultaneous threads." + - "The implementation of this function should be lock-free." +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] loader config handle to retain" +--- #-------------------------------------------------------------------------- +type: function +desc: "Release config handle." +class: $xLoaderConfig +loader_only: True +name: Release +decl: static +details: + - "Decrement reference count and destroy the config handle if reference count becomes zero." + - "The application may call this function from simultaneous threads." + - "The implementation of this function should be lock-free." +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] config handle to release" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Supported loader info" +class: $xLoaderConfig +name: $x_loader_config_info_t +typed_etors: True +etors: + - name: AVAILABLE_LAYERS + desc: "[char[]] Null-terminated, semi-colon separated list of available layers." + - name: REFERENCE_COUNT + desc: "[uint32_t] Reference count of the loader config object." +--- #-------------------------------------------------------------------------- +type: function +desc: "Retrieves various information about the loader." +class: $xLoaderConfig +loader_only: True +name: GetInfo +decl: static +details: + - "The application may call this function from simultaneous threads." + - "The implementation of this function should be lock-free." +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] handle of the loader config object" + - type: $x_loader_config_info_t + name: propName + desc: "[in] type of the info to retrieve" + - type: "size_t" + name: propSize + desc: | + [in] the number of bytes pointed to by pPropValue. + - type: "void*" + name: pPropValue + desc: | + [out][optional][typename(propName, propSize)] array of bytes holding the info. + If propSize is not equal to or greater than the real number of bytes needed to return the info + then the $X_RESULT_ERROR_INVALID_SIZE error is returned and pPropValue is not used. + - type: "size_t*" + name: pPropSizeRet + desc: | + [out][optional] pointer to the actual size in bytes of the queried propName. +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "If `propName` is not supported by the loader." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`propSize == 0 && pPropValue != NULL`" + - "If `propSize` is less than the real number of bytes needed to return the info." + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`propSize != 0 && pPropValue == NULL`" + - "`pPropValue == NULL && pPropSizeRet == NULL`" + - $X_RESULT_ERROR_INVALID_DEVICE + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Enable a layer for the specified loader config." +class: $xLoaderConfig +loader_only: True +name: EnableLayer +decl: static +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] Handle to config object the layer will be enabled for." + - type: const char* + name: pLayerName + desc: "[in] Null terminated string containing the name of the layer to enable." +returns: + - $X_RESULT_ERROR_LAYER_NOT_PRESENT: + - "If layer specified with `pLayerName` can't be found by the loader." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Code location data" +class: $xLoaderConfig +name: $x_code_location_t +members: + - type: const char* + name: functionName + desc: "[in][out] Function name." + - type: const char* + name: sourceFile + desc: "[in][out] Source code file." + - type: uint32_t + name: lineNumber + desc: "[in][out] Source code line number." + - type: uint32_t + name: columnNumber + desc: "[in][out] Source code column number." +--- #-------------------------------------------------------------------------- +type: fptr_typedef +desc: "Code location callback with user data." +name: $x_code_location_callback_t +return: $x_code_location_t +params: + - type: void* + name: pUserData + desc: "[in][out] pointer to data to be passed to callback" +--- #-------------------------------------------------------------------------- +type: function +desc: "Set a function callback for use by the loader to retrieve code location information." +details: + - "The code location callback is optional and provides additional information to the tracing layer about the entry point of the current execution flow." + - "This functionality can be used to match traced unified runtime function calls with higher-level user calls." +class: $xLoaderConfig +loader_only: True +name: SetCodeLocationCallback +decl: static +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] Handle to config object the layer will be enabled for." + - type: $x_code_location_callback_t + name: pfnCodeloc + desc: "[in] Function pointer to code location callback." + - type: void* + name: pUserData + desc: "[in][out][optional] pointer to data to be passed to callback." +--- #-------------------------------------------------------------------------- +type: function +desc: "Initialize the $OneApi loader" +class: $xLoader +loader_only: True +name: Init +decl: static +ordinal: "0" +details: + - "The application must call this function before calling any other function." + - "If this function is not called then all other functions will return $X_RESULT_ERROR_UNINITIALIZED." + - "Only one instance of the loader will be initialized per process." + - "The application may call this function multiple times with different flags or environment variables enabled." + - "The application must call this function after forking new processes. Each forked process must call this function." + - "The application may call this function from simultaneous threads." + - "The implementation of this function must be thread-safe for scenarios where multiple libraries may initialize the loader simultaneously." +params: + - type: $x_device_init_flags_t + name: device_flags + desc: | + [in] device initialization flags. + must be 0 (default) or a combination of $x_device_init_flag_t. + init: "0" + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in][optional] Handle of loader config handle." +returns: + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Tear down the $OneApi loader and release all its resources" +class: $xLoader +loader_only: True +name: TearDown +decl: static +ordinal: "1" +params: [] +returns: + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY diff --git a/scripts/core/platform.yml b/scripts/core/platform.yml index f7020b4138..510a7e16b3 100644 --- a/scripts/core/platform.yml +++ b/scripts/core/platform.yml @@ -46,7 +46,8 @@ params: desc: | [out][optional] returns the total number of platforms available. returns: - - $X_RESULT_ERROR_INVALID_SIZE + - $X_RESULT_ERROR_INVALID_SIZE: + - "`NumEntries == 0 && phPlatforms != NULL`" --- #-------------------------------------------------------------------------- type: enum desc: "Supported platform info" @@ -133,6 +134,12 @@ etors: - name: "0_7" value: "$X_MAKE_VERSION( 0, 7 )" desc: "version 0.7" + - name: "0_8" + value: "$X_MAKE_VERSION( 0, 8 )" + desc: "version 0.8" + - name: "0_9" + value: "$X_MAKE_VERSION( 0, 9 )" + desc: "version 0.9" --- #-------------------------------------------------------------------------- type: function desc: "Returns the API version supported by the specified platform" diff --git a/scripts/core/queue.yml b/scripts/core/queue.yml index 88fe153165..15934c0e2f 100644 --- a/scripts/core/queue.yml +++ b/scripts/core/queue.yml @@ -161,8 +161,9 @@ params: returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE - - $X_RESULT_ERROR_INVALID_VALUE - - $X_RESULT_ERROR_INVALID_QUEUE_PROPERTIES + - $X_RESULT_ERROR_INVALID_QUEUE_PROPERTIES: + - "`pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW`" + - "`pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 2d6ce08500..deb5ee9604 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -292,12 +292,6 @@ etors: - name: QUEUE_FLUSH desc: Enumerator for $xQueueFlush value: '98' -- name: INIT - desc: Enumerator for $xInit - value: '99' -- name: TEAR_DOWN - desc: Enumerator for $xTearDown - value: '100' - name: SAMPLER_CREATE desc: Enumerator for $xSamplerCreate value: '101' @@ -373,15 +367,6 @@ etors: - name: COMMAND_BUFFER_ENQUEUE_EXP desc: Enumerator for $xCommandBufferEnqueueExp value: '128' -- name: COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP - desc: Enumerator for $xCommandBufferAppendMemcpyUSMExp - value: '129' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP - desc: Enumerator for $xCommandBufferAppendMembufferCopyExp - value: '130' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMembufferCopyRectExp - value: '131' - name: USM_PITCHED_ALLOC_EXP desc: Enumerator for $xUSMPitchedAllocExp value: '132' @@ -487,18 +472,6 @@ etors: - name: USM_P2P_PEER_ACCESS_GET_INFO_EXP desc: Enumerator for $xUsmP2PPeerAccessGetInfoExp value: '167' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP - desc: Enumerator for $xCommandBufferAppendMembufferWriteExp - value: '168' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP - desc: Enumerator for $xCommandBufferAppendMembufferReadExp - value: '169' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMembufferWriteRectExp - value: '170' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMembufferReadRectExp - value: '171' - name: LOADER_CONFIG_CREATE desc: Enumerator for $xLoaderConfigCreate value: '172' @@ -538,6 +511,54 @@ etors: - name: PROGRAM_LINK_EXP desc: Enumerator for $xProgramLinkExp value: '199' +- name: LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK + desc: Enumerator for $xLoaderConfigSetCodeLocationCallback + value: '200' +- name: LOADER_INIT + desc: Enumerator for $xLoaderInit + value: '201' +- name: LOADER_TEAR_DOWN + desc: Enumerator for $xLoaderTearDown + value: '202' +- name: COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP + desc: Enumerator for $xCommandBufferAppendUSMMemcpyExp + value: '203' +- name: COMMAND_BUFFER_APPEND_USM_FILL_EXP + desc: Enumerator for $xCommandBufferAppendUSMFillExp + value: '204' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferCopyExp + value: '205' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferWriteExp + value: '206' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferReadExp + value: '207' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferCopyRectExp + value: '208' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferWriteRectExp + value: '209' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferReadRectExp + value: '210' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferFillExp + value: '211' +- name: COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP + desc: Enumerator for $xCommandBufferAppendUSMPrefetchExp + value: '212' +- name: COMMAND_BUFFER_APPEND_USM_ADVISE_EXP + desc: Enumerator for $xCommandBufferAppendUSMAdviseExp + value: '213' +- name: ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xEnqueueCooperativeKernelLaunchExp + value: '214' +- name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP + desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp + value: '215' --- type: enum desc: Defines structure types diff --git a/scripts/ctest_parser.py b/scripts/ctest_parser.py new file mode 100644 index 0000000000..1f9c4f6cfe --- /dev/null +++ b/scripts/ctest_parser.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +""" + Copyright (C) 2022 Intel Corporation + + Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + See LICENSE.TXT + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +""" + +from subprocess import Popen, DEVNULL, PIPE +import argparse +import os +import json + +TMP_RESULTS_FILE = "tmp-results-file.json" + +def get_cts_test_suite_names(working_directory): + process = Popen(["ctest", "--show-only=json-v1"], cwd=working_directory, + stdout=PIPE, env=os.environ.copy()) + out,_ = process.communicate() + testsuites = json.loads(out) + return [ + test['name'][:test['name'].rfind('-')] for test in testsuites['tests'] + ] + +def percent(amount, total): + return round((amount / total) * 100, 2) + +def summarize_results(results): + total = results['Total'] + total_passed = len(results['Passed']) + total_skipped = len(results['Skipped']) + total_failed = len(results['Failed']) + total_crashed = total - (total_passed + total_skipped + total_failed) + + pass_rate_incl_skipped = percent(total_passed, total) + pass_rate_excl_skipped = percent(total_passed, total - total_skipped) + + skipped_rate = percent(total_skipped, total) + failed_rate = percent(total_failed, total) + crash_rate = percent(total_crashed, total) + + ljust_param = len(str(total)) + + print( +f"""[CTest Parser] Results: + Total [{str(total).ljust(ljust_param)}] + Passed [{str(total_passed).ljust(ljust_param)}] ({pass_rate_incl_skipped}%) - ({pass_rate_excl_skipped}% with skipped tests excluded) + Skipped [{str(total_skipped).ljust(ljust_param)}] ({skipped_rate}%) + Failed [{str(total_failed).ljust(ljust_param)}] ({failed_rate}%) + Crashed [{str(total_crashed).ljust(ljust_param)}] ({crash_rate}%) +""" + ) + +def parse_results(results): + parsed_results = {"Passed": {}, "Skipped":{}, "Failed": {}, 'Crashed': {}, 'Total':0, 'Success':True} + for _, result in results.items(): + if result['actual'] is None: + parsed_results['Success'] = False + parsed_results['Total'] += result['expected']['tests'] + continue + + parsed_results['Total'] += result['actual']['tests'] + for testsuite in result['actual'].get('testsuites'): + for test in testsuite.get('testsuite'): + test_name = f"{testsuite['name']}.{test['name']}" + test_time = test['time'] + if 'failures' in test: + parsed_results['Failed'][test_name] = {'time': test_time} + elif test['result'] == 'SKIPPED': + parsed_results['Skipped'][test_name] = {'time': test_time} + else: + parsed_results['Passed'][test_name] = {'time': test_time} + return parsed_results + +def run(args): + results = {} + + tmp_results_file = f"{args.ctest_path}/{TMP_RESULTS_FILE}" + env = os.environ.copy() + env['GTEST_OUTPUT'] = f"json:{tmp_results_file}" + + test_suite_names = get_cts_test_suite_names(f"{args.ctest_path}/test/conformance/") + + ## try and list all the available tests + for suite in test_suite_names: + results[suite] = {} + test_executable = f"{args.ctest_path}/bin/test-{suite}" + process = Popen([test_executable, "--gtest_list_tests"], env=env, + stdout=DEVNULL if args.quiet else None, + stderr=DEVNULL if args.quiet else None) + process.wait() + try: + with open(tmp_results_file,'r') as test_list: + all_tests = json.load(test_list) + results[suite]['expected'] = all_tests + os.remove(tmp_results_file) + except FileNotFoundError: + print(f"Could not discover tests for {suite}") + + for suite in test_suite_names: + ctest_path = f"{args.ctest_path}/test/conformance/{suite}" + process = Popen(['ctest',ctest_path], env=env, cwd=ctest_path, + stdout=DEVNULL if args.quiet else None, + stderr=DEVNULL if args.quiet else None) + process.wait() + + try: + with open(tmp_results_file, 'r') as results_file: + json_data = json.load(results_file) + results[suite]['actual'] = json_data + os.remove(tmp_results_file) + except FileNotFoundError: + results[suite]['actual'] = None + print('\033[91m' + f"Conformance test suite '{suite}' : likely crashed!" + '\033[0m') + + return results + +def dir_path(string): + if os.path.isdir(string): + return os.path.abspath(string) + else: + raise NotADirectoryError(string) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('ctest_path', type=dir_path, nargs='?', default='.', + help='Optional path to test directory containing ' + 'CTestTestfile. Defaults to current directory.') + parser.add_argument('-q', '--quiet', action='store_true', + help='Output only failed tests.') + args = parser.parse_args() + + raw_results = run(args) + parsed_results = parse_results(raw_results) + summarize_results(parsed_results) + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + exit(130) diff --git a/scripts/generate_code.py b/scripts/generate_code.py index 9c427c3780..492ff88df8 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -97,6 +97,7 @@ def _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, spec loc = _mako_api_h(incpath, namespace, tags, version, revision, specs, meta) loc += _mako_api_cpp(srcpath, namespace, tags, version, revision, specs, meta) loc += _mako_ddi_h(incpath, namespace, tags, version, revision, specs, meta) + loc += _mako_print_hpp(incpath, namespace, tags, version, revision, specs, meta) return loc @@ -278,11 +279,11 @@ def _mako_tracing_layer_cpp(path, namespace, tags, version, specs, meta): """ generates c/c++ files from the specification documents """ -def _mako_params_hpp(path, namespace, tags, version, specs, meta): - template = "params.hpp.mako" +def _mako_print_hpp(path, namespace, tags, version, revision, specs, meta): + template = "print.hpp.mako" fin = os.path.join(templates_dir, template) - name = "%s_params"%(namespace) + name = "%s_print"%(namespace) filename = "%s.hpp"%(name) fout = os.path.join(path, filename) @@ -291,6 +292,7 @@ def _mako_params_hpp(path, namespace, tags, version, specs, meta): fin, fout, name=name, ver=version, + rev=revision, namespace=namespace, tags=tags, specs=specs, @@ -401,7 +403,6 @@ def generate_common(path, section, namespace, tags, version, specs, meta): os.makedirs(layer_dstpath, exist_ok=True) loc = 0 - loc += _mako_params_hpp(layer_dstpath, namespace, tags, version, specs, meta) print("COMMON Generated %s lines of code.\n"%loc) """ diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index 07ae086efd..332af88cc7 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -18,8 +18,8 @@ import ctypes import itertools -default_version = "0.7" -all_versions = ["0.6", "0.7"] +default_version = "0.9" +all_versions = ["0.6", "0.7", "0.8", "0.9"] """ preprocess object @@ -97,7 +97,7 @@ def __validate_ordinal(d): ordinal = None if ordinal != d['ordinal']: - raise Exception("'ordinal' invalid value: '%s'"%d['ordinal']) + raise Exception("'ordinal' invalid value: '%s'"%d['ordinal']) def __validate_version(d, prefix="", base_version=default_version): if 'version' in d: @@ -333,20 +333,29 @@ def __validate_params(d, tags): if item['type'].endswith("flag_t"): raise Exception(prefix+"'type' must not be '*_flag_t': %s"%item['type']) - + if type_traits.is_pointer(item['type']) and "_handle_t" in item['type'] and "[in]" in item['desc']: if not param_traits.is_range(item): raise Exception(prefix+"handle type must include a range(start, end) as part of 'desc'") + if param_traits.is_bounds(item): + has_queue = False + for p in d['params']: + if re.match(r"hQueue$", p['name']): + has_queue = True + + if not has_queue: + raise Exception(prefix+"bounds must only be used on entry points which take a `hQueue` parameter") + ver = __validate_version(item, prefix=prefix, base_version=d_ver) if ver < max_ver: raise Exception(prefix+"'version' must be increasing: %s"%item['version']) max_ver = ver - + def __validate_union_tag(d): if d.get('tag') is None: raise Exception(f"{d['name']} must include a 'tag' part of the union.") - + try: if 'type' not in d: raise Exception("every document must have 'type'") @@ -466,7 +475,7 @@ def __filter_desc(d): return d flt = [] - type = d['type'] + type = d['type'] if 'enum' == type: for e in d['etors']: ver = float(e.get('version', default_version)) @@ -706,58 +715,54 @@ def _append(lst, key, val): if val and val not in rets[idx][key]: rets[idx][key].append(val) + def append_nullchecks(param, accessor: str): + if type_traits.is_pointer(param['type']): + _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`" % accessor) + + elif type_traits.is_funcptr(param['type'], meta): + _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`" % accessor) + + elif type_traits.is_handle(param['type']) and not type_traits.is_ipc_handle(item['type']): + _append(rets, "$X_RESULT_ERROR_INVALID_NULL_HANDLE", "`NULL == %s`" % accessor) + + def append_enum_checks(param, accessor: str): + ptypename = type_traits.base(param['type']) + + prefix = "`" + if param_traits.is_optional(item): + prefix = "`NULL != %s && " % item['name'] + + if re.match(r"stype", param['name']): + _append(rets, "$X_RESULT_ERROR_UNSUPPORTED_VERSION", prefix + "%s != %s`"%(re.sub(r"(\$\w)_(.*)_t.*", r"\1_STRUCTURE_TYPE_\2", typename).upper(), accessor)) + else: + if type_traits.is_flags(param['type']) and 'bit_mask' in meta['enum'][ptypename].keys(): + _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", prefix + "%s & %s`"%(ptypename.upper()[:-2]+ "_MASK", accessor)) + else: + _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", prefix + "%s < %s`"%(meta['enum'][ptypename]['max'], accessor)) + # generate results based on parameters for item in obj['params']: if param_traits.is_nocheck(item): continue if not param_traits.is_optional(item): + append_nullchecks(item, item['name']) + + if type_traits.is_enum(item['type'], meta) and not type_traits.is_pointer(item['type']): + append_enum_checks(item, item['name']) + + if type_traits.is_descriptor(item['type']) or type_traits.is_properties(item['type']): typename = type_traits.base(item['type']) + # walk each entry in the desc for pointers and enums + for i, m in enumerate(meta['struct'][typename]['members']): + if param_traits.is_nocheck(m): + continue + + if not param_traits.is_optional(m): + append_nullchecks(m, "%s->%s" % (item['name'], m['name'])) - if type_traits.is_pointer(item['type']): - _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`"%item['name']) - - elif type_traits.is_funcptr(item['type'], meta): - _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`"%item['name']) - - elif type_traits.is_handle(item['type']) and not type_traits.is_ipc_handle(item['type']): - _append(rets, "$X_RESULT_ERROR_INVALID_NULL_HANDLE", "`NULL == %s`"%item['name']) - - elif type_traits.is_enum(item['type'], meta): - if type_traits.is_flags(item['type']) and 'bit_mask' in meta['enum'][typename].keys(): - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s & %s`"%(typename.upper()[:-2]+ "_MASK", item['name'])) - else: - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s < %s`"%(meta['enum'][typename]['max'], item['name'])) - - if type_traits.is_descriptor(item['type']): - # walk each entry in the desc for pointers and enums - for i, m in enumerate(meta['struct'][typename]['members']): - if param_traits.is_nocheck(m): - continue - mtypename = type_traits.base(m['type']) - - if type_traits.is_pointer(m['type']) and not param_traits.is_optional({'desc': m['desc']}): - _append(rets, - "$X_RESULT_ERROR_INVALID_NULL_POINTER", - "`NULL == %s->%s`"%(item['name'], m['name'])) - - elif type_traits.is_enum(m['type'], meta): - if re.match(r"stype", m['name']): - _append(rets, "$X_RESULT_ERROR_UNSUPPORTED_VERSION", "`%s != %s->stype`"%(re.sub(r"(\$\w)_(.*)_t.*", r"\1_STRUCTURE_TYPE_\2", typename).upper(), item['name'])) - else: - if type_traits.is_flags(m['type']) and 'bit_mask' in meta['enum'][mtypename].keys(): - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s & %s->%s`"%(mtypename.upper()[:-2]+ "_MASK", item['name'], m['name'])) - else: - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s < %s->%s`"%(meta['enum'][mtypename]['max'], item['name'], m['name'])) - - elif type_traits.is_properties(item['type']): - # walk each entry in the properties - for i, m in enumerate(meta['struct'][typename]['members']): - if param_traits.is_nocheck(m): - continue - if type_traits.is_enum(m['type'], meta): - if re.match(r"stype", m['name']): - _append(rets, "$X_RESULT_ERROR_UNSUPPORTED_VERSION", "`%s != %s->stype`"%(re.sub(r"(\$\w)_(.*)_t.*", r"\1_STRUCTURE_TYPE_\2", typename).upper(), item['name'])) + if type_traits.is_enum(m['type'], meta) and not type_traits.is_pointer(m['type']): + append_enum_checks(m, "%s->%s" % (item['name'], m['name'])) # finally, append all user entries for item in obj.get('returns', []): @@ -823,7 +828,7 @@ def _refresh_enum_meta(obj, meta): ## remove the existing meta records if obj.get('class'): meta['class'][obj['class']]['enum'].remove(obj['name']) - + if meta['enum'].get(obj['name']): del meta['enum'][obj['name']] ## re-generate meta @@ -851,13 +856,13 @@ def _extend_enums(enum_extensions, specs, meta): if not _validate_ext_enum_range(extension, matching_enum): raise Exception(f"Invalid enum values.") matching_enum['etors'].extend(extension['etors']) - + _refresh_enum_meta(matching_enum, meta) ## Sort the etors value = -1 def sort_etors(x): - nonlocal value + nonlocal value value = _get_etor_value(x.get('value'), value) return value matching_enum['etors'] = sorted(matching_enum['etors'], key=sort_etors) @@ -872,6 +877,7 @@ def parse(section, version, tags, meta, ref): specs = [] files = util.findFiles(path, "*.yml") + files.sort(key = lambda f: 0 if f.endswith('common.yml') else 1) registry = [f for f in files if f.endswith('registry.yml')][0] enum_extensions = [] diff --git a/scripts/templates/api.h.mako b/scripts/templates/api.h.mako index 74ba84beeb..41d6d8f456 100644 --- a/scripts/templates/api.h.mako +++ b/scripts/templates/api.h.mako @@ -151,6 +151,7 @@ typedef struct ${th.subt(n, tags, obj['name'])}_ *${th.subt(n, tags, obj['name'] #endif %for tbl in th.get_pfncbtables(specs, meta, n, tags): %for obj in tbl['functions']: +%if obj['params']: /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for ${th.make_func_name(n, tags, obj)} /// @details Each entry is a pointer to the parameter passed to the function; @@ -167,6 +168,7 @@ typedef struct ${th.make_pfncb_param_type(n, tags, obj)} %if 'condition' in obj: #endif // ${th.subt(n, tags, obj['condition'])} %endif +%endif %endfor %endfor diff --git a/scripts/templates/api.py.mako b/scripts/templates/api.py.mako index 35a2fd6d27..7815f2cf53 100644 --- a/scripts/templates/api.py.mako +++ b/scripts/templates/api.py.mako @@ -175,7 +175,7 @@ class ${N}_DDI: self.__dditable = ${n}_dditable_t() # initialize the UR - self.__dll.${x}Init(0, 0) + self.__dll.${x}LoaderInit(0, 0) %for tbl in tables: # call driver to get function pointers diff --git a/scripts/templates/api_listing.mako b/scripts/templates/api_listing.mako index 722a803915..252c5ee887 100644 --- a/scripts/templates/api_listing.mako +++ b/scripts/templates/api_listing.mako @@ -115,7 +115,7 @@ ${title} ## ------------------------- <%isempty = True%> %for obj in objects: -%if re.match(r"typedef", obj['type']): +%if re.match(r"typedef", obj['type']) or re.match(r"fptr_typedef", obj['type']): %if isempty: # only display section title if there is content. %if needstitle: <%needstitle = False%> @@ -245,7 +245,7 @@ ${th.make_type_name(n, tags, obj)} ## ------------------------- <%isempty = True%> %for obj in objects: -%if re.match(r"typedef", obj['type']): +%if re.match(r"typedef", obj['type']) or re.match(r"fptr_typedef", obj['type']): %if isempty: # only display section title if there is content. ${title} Typedefs -------------------------------------------------------------------------------- diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 2b283b8119..928db1675c 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -105,6 +105,7 @@ class type_traits: RE_DESC = r"(.*)desc_t.*" RE_PROPS = r"(.*)properties_t.*" RE_FLAGS = r"(.*)flags_t" + RE_ARRAY = r"(.*)\[([1-9][0-9]*)\]" @staticmethod def base(name): @@ -217,6 +218,29 @@ def find_class_name(name, meta): except: return None + @classmethod + def is_array(cls, name): + try: + return True if re.match(cls.RE_ARRAY, name) else False + except: + return False + + @classmethod + def get_array_length(cls, name): + if not cls.is_array(name): + raise Exception("Cannot find array length of non-array type.") + + match = re.match(cls.RE_ARRAY, name) + return match.groups()[1] + + @classmethod + def get_array_element_type(cls, name): + if not cls.is_array(name): + raise Exception("Cannot find array type of non-array type.") + + match = re.match(cls.RE_ARRAY, name) + return match.groups()[0] + """ Extracts traits from a value name """ @@ -332,6 +356,7 @@ class param_traits: RE_RELEASE = r".*\[release\].*" RE_TYPENAME = r".*\[typename\((.+),\s(.+)\)\].*" RE_TAGGED = r".*\[tagged_by\((.+)\)].*" + RE_BOUNDS = r".*\[bounds\((.+),\s*(.+)\)].*" @classmethod def is_mbz(cls, item): @@ -388,6 +413,13 @@ def is_tagged(cls, item): return True if re.match(cls.RE_TAGGED, item['desc']) else False except: return False + + @classmethod + def is_bounds(cls, item): + try: + return True if re.match(cls.RE_BOUNDS, item['desc']) else False + except: + return False @classmethod def tagged_member(cls, item): @@ -433,6 +465,22 @@ def typename_size(cls, item): else: return None + @classmethod + def bounds_offset(cls, item): + match = re.match(cls.RE_BOUNDS, item['desc']) + if match: + return match.group(1) + else: + return None + + @classmethod + def bounds_size(cls, item): + match = re.match(cls.RE_BOUNDS, item['desc']) + if match: + return match.group(2) + else: + return None + """ Extracts traits from a function object """ @@ -729,7 +777,10 @@ def make_etor_lines(namespace, tags, obj, py=False, meta=None): returns c/c++ name of any type """ def _get_type_name(namespace, tags, obj, item): - name = subt(namespace, tags, item['type'],) + type = item['type'] + if type_traits.is_array(type): + type = type_traits.get_array_element_type(type) + name = subt(namespace, tags, type,) return name """ @@ -763,9 +814,9 @@ def get_ctype_name(namespace, tags, item): while type_traits.is_pointer(name): name = "POINTER(%s)"%_remove_ptr(name) - if 'name' in item and value_traits.is_array(item['name']): - length = subt(namespace, tags, value_traits.get_array_length(item['name'])) - name = "%s * %s"%(name, length) + if 'name' in item and type_traits.is_array(item['type']): + length = subt(namespace, tags, type_traits.get_array_length(item['type'])) + name = "%s * %s"%(type_traits.get_array_element_type(name), length) return name @@ -804,7 +855,8 @@ def make_member_lines(namespace, tags, obj, prefix="", py=False, meta=None): delim = "," if i < (len(obj['members'])-1) else "" prologue = "(\"%s\", %s)%s"%(name, tname, delim) else: - prologue = "%s %s;"%(tname, name) + array_suffix = f"[{type_traits.get_array_length(item['type'])}]" if type_traits.is_array(item['type']) else "" + prologue = "%s %s %s;"%(tname, name, array_suffix) comment_style = "##" if py else "///<" ws_count = 64 if py else 48 @@ -1013,7 +1065,35 @@ def make_pfncb_param_type(namespace, tags, obj): """ Public: - returns a dict of auto-generated c++ parameter validation checks + returns an appropriate bounds helper function call for an entry point + parameter with the [bounds] tag +""" +def get_bounds_check(param, bounds_error): + # Images need their own helper, since function signature wise they would be + # identical to buffer rect + bounds_function = 'boundsImage' if 'image' in param['name'].lower() else 'bounds' + bounds_check = "auto {0} = {1}({2}, {3}, {4})".format( + bounds_error, + bounds_function, + param["name"], + param_traits.bounds_offset(param), + param_traits.bounds_size(param), + ) + bounds_check += '; {0} != UR_RESULT_SUCCESS'.format(bounds_error) + + # USM bounds checks need the queue handle parameter to be able to use the + # GetMemAllocInfo entry point + if type_traits.is_pointer(param['type']): + # If no `hQueue` parameter exists that should have been caught at spec + # generation. + return re.sub(r'bounds\(', 'bounds(hQueue, ', bounds_check) + + return bounds_check + +""" +Public: + returns a dict of auto-generated c++ parameter validation checks for the + given function (specified by `obj`) """ def make_param_checks(namespace, tags, obj, cpp=False, meta=None): checks = {} @@ -1026,6 +1106,13 @@ def make_param_checks(namespace, tags, obj, cpp=False, meta=None): if key not in checks: checks[key] = [] checks[key].append(subt(namespace, tags, code.group(1), False, cpp)) + + for p in obj.get('params', []): + if param_traits.is_bounds(p): + if 'boundsError' not in checks: + checks['boundsError'] = [] + checks['boundsError'].append(get_bounds_check(p, 'boundsError')) + return checks """ @@ -1300,3 +1387,14 @@ def get_create_retain_release_functions(specs, namespace, tags): ) return {"create": create_funcs, "retain": retain_funcs, "release": release_funcs} + + +def get_event_wait_list_functions(specs, namespace, tags): + funcs = [] + for s in specs: + for obj in s['objects']: + if re.match(r"function", obj['type']): + if any(x['name'] == 'phEventWaitList' for x in obj['params']) and any( + x['name'] == 'numEventsInWaitList' for x in obj['params']): + funcs.append(make_func_name(namespace, tags, obj)) + return funcs diff --git a/scripts/templates/index.rst.mako b/scripts/templates/index.rst.mako index 8a53ba0427..1d5ba6a9b0 100644 --- a/scripts/templates/index.rst.mako +++ b/scripts/templates/index.rst.mako @@ -14,5 +14,7 @@ core/INTRO.rst core/PROG.rst core/CONTRIB.rst + core/CUDA.rst + core/HIP.rst exp-features.rst api.rst diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 0498ba00dc..f79f2aca3b 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -51,22 +51,7 @@ namespace ur_loader add_local = False %> - %if re.match(r"Init", obj['name']): - for( auto& platform : context->platforms ) - { - if(platform.initStatus != ${X}_RESULT_SUCCESS) - continue; - platform.initStatus = platform.dditable.${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - } - - %elif re.match(r"\w+TearDown$", th.make_func_name(n, tags, obj)): - - for( auto& platform : context->platforms ) - { - platform.dditable.${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - } - - %elif re.match(r"\w+AdapterGet$", th.make_func_name(n, tags, obj)): + %if re.match(r"\w+AdapterGet$", th.make_func_name(n, tags, obj)): size_t adapterIndex = 0; if( nullptr != ${obj['params'][1]['name']} && ${obj['params'][0]['name']} !=0) @@ -86,6 +71,9 @@ namespace ur_loader break; } adapterIndex++; + if (adapterIndex == NumEntries) { + break; + } } } diff --git a/scripts/templates/libapi.cpp.mako b/scripts/templates/libapi.cpp.mako index d269d62241..6fe1f3992b 100644 --- a/scripts/templates/libapi.cpp.mako +++ b/scripts/templates/libapi.cpp.mako @@ -56,19 +56,27 @@ ${th.make_func_name(n, tags, obj)}( %endfor ) try { -%if th.obj_traits.is_loader_only(obj): - return ur_lib::${th.make_func_name(n, tags, obj)}(${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); -%else: %if re.match("Init", obj['name']): + <% + param_checks=th.make_param_checks(n, tags, obj, meta=meta).items() + %> + %for key, values in param_checks: + %for val in values: + if( ${val} ) + return ${key}; + + %endfor + %endfor + static ${x}_result_t result = ${X}_RESULT_SUCCESS; std::call_once(${x}_lib::context->initOnce, [device_flags, hLoaderConfig]() { result = ${x}_lib::context->Init(device_flags, hLoaderConfig); }); - if( ${X}_RESULT_SUCCESS != result ) - return result; - -%endif + return result; +%elif th.obj_traits.is_loader_only(obj): + return ur_lib::${th.make_func_name(n, tags, obj)}(${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); +%else: auto ${th.make_pfn_name(n, tags, obj)} = ${x}_lib::context->${n}DdiTable.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) return ${X}_RESULT_ERROR_UNINITIALIZED; diff --git a/scripts/templates/libddi.cpp.mako b/scripts/templates/libddi.cpp.mako index de73cc2fc7..eae178324c 100644 --- a/scripts/templates/libddi.cpp.mako +++ b/scripts/templates/libddi.cpp.mako @@ -28,7 +28,7 @@ namespace ${x}_lib /////////////////////////////////////////////////////////////////////////////// - __${x}dlllocal ${x}_result_t context_t::${n}Init() + __${x}dlllocal ${x}_result_t context_t::${n}LoaderInit() { ${x}_result_t result = ${X}_RESULT_SUCCESS; diff --git a/scripts/templates/params.hpp.mako b/scripts/templates/print.hpp.mako similarity index 54% rename from scripts/templates/params.hpp.mako rename to scripts/templates/print.hpp.mako index 863c3d37ea..f79a6ab87a 100644 --- a/scripts/templates/params.hpp.mako +++ b/scripts/templates/print.hpp.mako @@ -16,26 +16,30 @@ from templates import helper as th * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ${name}.hpp + * @version v${ver}-r${rev} * */ -#ifndef ${X}_PARAMS_HPP -#define ${X}_PARAMS_HPP 1 +#ifndef ${X}_PRINT_HPP +#define ${X}_PRINT_HPP 1 #include "${x}_api.h" #include #include +## Mako helper functions ###################################################### <%def name="member(iname, itype, loop)"> %if iname == "pNext": - ${x}_params::serializeStruct(os, ${caller.body()}); + ${x}::details::printStruct(os, ${caller.body()}); %elif th.type_traits.is_flags(itype): - ${x}_params::serializeFlag<${th.type_traits.get_flag_type(itype)}>(os, ${caller.body()}); + ${x}::details::printFlag<${th.type_traits.get_flag_type(itype)}>(os, ${caller.body()}); %elif not loop and th.type_traits.is_pointer(itype): - ${x}_params::serializePtr(os, ${caller.body()}); + ${x}::details::printPtr(os, ${caller.body()}); %elif loop and th.type_traits.is_pointer_to_pointer(itype): - ${x}_params::serializePtr(os, ${caller.body()}); + ${x}::details::printPtr(os, ${caller.body()}); %elif th.type_traits.is_handle(itype): - ${x}_params::serializePtr(os, ${caller.body()}); + ${x}::details::printPtr(os, ${caller.body()}); + %elif iname and iname.startswith("pfn"): + os << reinterpret_cast(${caller.body()}); %else: os << ${caller.body()}; %endif @@ -81,10 +85,21 @@ def findMemberType(_item): os << "}"; %elif findMemberType(item) is not None and findMemberType(item)['type'] == "union": os << ".${iname} = "; - ${x}_params::serializeUnion(os, ${deref}(params${access}${item['name']}), params${access}${th.param_traits.tagged_member(item)}); + ${x}::details::printUnion(os, ${deref}(params${access}${item['name']}), params${access}${th.param_traits.tagged_member(item)}); + %elif th.type_traits.is_array(item['type']): + os << ".${iname} = {"; + for(auto i = 0; i < ${th.type_traits.get_array_length(item['type'])}; i++){ + if(i != 0){ + os << ", "; + } + <%call expr="member(iname, itype, True)"> + ${deref}(params${access}${item['name']}[i]) + + } + os << "}"; %elif typename is not None: os << ".${iname} = "; - ${x}_params::serializeTagged(os, ${deref}(params${access}${pname}), ${deref}(params${access}${prefix}${typename}), ${deref}(params${access}${prefix}${typename_size})); + ${x}::details::printTagged(os, ${deref}(params${access}${pname}), ${deref}(params${access}${prefix}${typename}), ${deref}(params${access}${prefix}${typename_size})); %else: os << ".${iname} = "; <%call expr="member(iname, itype, False)"> @@ -93,7 +108,8 @@ def findMemberType(_item): %endif -namespace ${x}_params { +## API functions declarations ################################################# +namespace ${x}::details { template struct is_handle : std::false_type {}; %for spec in specs: %for obj in spec['objects']: @@ -104,66 +120,70 @@ template <> struct is_handle<${th.make_type_name(n, tags, obj)}> : std::true_typ %endfor template inline constexpr bool is_handle_v = is_handle::value; -template inline void serializePtr(std::ostream &os, T *ptr); -template inline void serializeFlag(std::ostream &os, uint32_t flag); -template inline void serializeTagged(std::ostream &os, const void *ptr, T value, size_t size); +template inline ${x}_result_t printPtr(std::ostream &os, const T *ptr); +template inline ${x}_result_t printFlag(std::ostream &os, uint32_t flag); +template inline ${x}_result_t printTagged(std::ostream &os, const void *ptr, T value, size_t size); %for spec in specs: %for obj in spec['objects']: ## ENUM ####################################################################### %if re.match(r"enum", obj['type']): %if obj.get('typed_etors', False) is True: - template <> inline void serializeTagged(std::ostream &os, const void *ptr, ${th.make_enum_name(n, tags, obj)} value, size_t size); + template <> inline ${x}_result_t printTagged(std::ostream &os, const void *ptr, ${th.make_enum_name(n, tags, obj)} value, size_t size); %elif "structure_type" in obj['name']: - inline void serializeStruct(std::ostream &os, const void *ptr); + inline ${x}_result_t printStruct(std::ostream &os, const void *ptr); %endif %endif +## UNION ###################################################################### %if re.match(r"union", obj['type']) and obj['name']: <% tag = [_obj for _s in specs for _obj in _s['objects'] if _obj['name'] == obj['tag']][0] %> - inline void serializeUnion( + inline ${x}_result_t printUnion( std::ostream &os, const ${obj['type']} ${th.make_type_name(n, tags, obj)} params, const ${tag['type']} ${th.make_type_name(n, tags, tag)} tag ); %endif - +## FLAG ####################################################################### %if th.type_traits.is_flags(obj['name']): - template<> inline void serializeFlag<${th.make_enum_name(n, tags, obj)}>(std::ostream &os, uint32_t flag); + template<> inline ${x}_result_t printFlag<${th.make_enum_name(n, tags, obj)}>(std::ostream &os, uint32_t flag); %endif %endfor # obj in spec['objects'] %endfor -} // namespace ${x}_params +} // namespace ${x}::details %for spec in specs: %for obj in spec['objects']: -## ENUM ####################################################################### %if re.match(r"enum", obj['type']): - inline std::ostream &operator<<(std::ostream &os, enum ${th.make_enum_name(n, tags, obj)} value); + inline std::ostream &operator<<(std::ostream &os, ${th.make_enum_name(n, tags, obj)} value); %elif re.match(r"struct", obj['type']): - inline std::ostream &operator<<(std::ostream &os, const ${obj['type']} ${th.make_type_name(n, tags, obj)} params); + inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ${obj['type']} ${th.make_type_name(n, tags, obj)} params); %endif %endfor # obj in spec['objects'] %endfor +## API functions definitions ################################################## %for spec in specs: %for obj in spec['objects']: ## ENUM ####################################################################### %if re.match(r"enum", obj['type']): + /////////////////////////////////////////////////////////////////////////////// + /// @brief Print operator for the ${th.make_enum_name(n, tags, obj)} type + /// @returns + /// std::ostream & %if "api_version" in obj['name']: - inline std::ostream &operator<<(std::ostream &os, enum ${th.make_enum_name(n, tags, obj)} value) { + inline std::ostream &operator<<(std::ostream &os, ${th.make_enum_name(n, tags, obj)} value) { os << UR_MAJOR_VERSION(value) << "." << UR_MINOR_VERSION(value); return os; } %else: - inline std::ostream &operator<<(std::ostream &os, enum ${th.make_enum_name(n, tags, obj)} value) { + inline std::ostream &operator<<(std::ostream &os, ${th.make_enum_name(n, tags, obj)} value) { switch (value) { %for n, item in enumerate(obj['etors']): <% ename = th.make_etor_name(n, tags, obj['name'], item['name']) - %> - case ${ename}: + %>case ${ename}: os << "${ename}"; break; %endfor @@ -175,12 +195,13 @@ template inline void serializeTagged(std::ostream &os, const void * } %endif %if obj.get('typed_etors', False) is True: - namespace ${x}_params { + namespace ${x}::details { + /////////////////////////////////////////////////////////////////////////////// + /// @brief Print ${th.make_enum_name(n, tags, obj)} enum value template <> - inline void serializeTagged(std::ostream &os, const void *ptr, ${th.make_enum_name(n, tags, obj)} value, size_t size) { + inline ${x}_result_t printTagged(std::ostream &os, const void *ptr, ${th.make_enum_name(n, tags, obj)} value, size_t size) { if (ptr == NULL) { - serializePtr(os, ptr); - return; + return printPtr(os, ptr); } switch (value) { @@ -188,13 +209,16 @@ template inline void serializeTagged(std::ostream &os, const void * <% ename = th.make_etor_name(n, tags, obj['name'], item['name']) vtype = th.etor_get_associated_type(n, tags, item) - %> - case ${ename}: { + %>case ${ename}: { %if th.value_traits.is_array(vtype): <% atype = th.value_traits.get_array_name(vtype) %> + %if 'void' in atype: + const ${atype} const *tptr = (const ${atype} const*)ptr; + %else: const ${atype} *tptr = (const ${atype} *)ptr; + %endif %if "char" in atype: ## print char* arrays as simple NULL-terminated strings - serializePtr(os, tptr); + printPtr(os, tptr); %else: os << "{"; size_t nelems = size / sizeof(${atype}); @@ -209,12 +233,16 @@ template inline void serializeTagged(std::ostream &os, const void * os << "}"; %endif %else: + %if 'void' in vtype: + const ${vtype} const *tptr = (const ${vtype} const *)ptr; + %else: const ${vtype} *tptr = (const ${vtype} *)ptr; + %endif if (sizeof(${vtype}) > size) { os << "invalid size (is: " << size << ", expected: >=" << sizeof(${vtype}) << ")"; - return; + return ${X}_RESULT_ERROR_INVALID_SIZE; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; <%call expr="member(tptr, vtype, False)"> *tptr @@ -224,20 +252,23 @@ template inline void serializeTagged(std::ostream &os, const void * %endfor default: os << "unknown enumerator"; - break; + return ${X}_RESULT_ERROR_INVALID_ENUMERATION; } + return ${X}_RESULT_SUCCESS; } - } + } // namespace ${x}::details + %elif "structure_type" in obj['name']: - namespace ${x}_params { - inline void serializeStruct(std::ostream &os, const void *ptr) { + namespace ${x}::details { + /////////////////////////////////////////////////////////////////////////////// + /// @brief Print ${th.make_enum_name(n, tags, obj)} struct + inline ${x}_result_t printStruct(std::ostream &os, const void *ptr) { if (ptr == NULL) { - ${x}_params::serializePtr(os, ptr); - return; + return printPtr(os, ptr); } ## structure type enum value must be first - enum ${th.make_enum_name(n, tags, obj)} *value = (enum ${th.make_enum_name(n, tags, obj)} *)ptr; + const enum ${th.make_enum_name(n, tags, obj)} *value = (const enum ${th.make_enum_name(n, tags, obj)} *)ptr; switch (*value) { %for n, item in enumerate(obj['etors']): <% @@ -245,54 +276,62 @@ template inline void serializeTagged(std::ostream &os, const void * %> case ${ename}: { const ${th.subt(n, tags, item['desc'])} *pstruct = (const ${th.subt(n, tags, item['desc'])} *)ptr; - ${x}_params::serializePtr(os, pstruct); + printPtr(os, pstruct); } break; %endfor default: os << "unknown enumerator"; - break; + return ${X}_RESULT_ERROR_INVALID_ENUMERATION; } + return ${X}_RESULT_SUCCESS; } - } // namespace ${x}_params + } // namespace ${x}::details %endif -%if th.type_traits.is_flags(obj['name']): -namespace ${x}_params { + %if th.type_traits.is_flags(obj['name']): -template<> -inline void serializeFlag<${th.make_enum_name(n, tags, obj)}>(std::ostream &os, uint32_t flag) { - uint32_t val = flag; - bool first = true; - %for n, item in enumerate(obj['etors']): - <% - ename = th.make_etor_name(n, tags, obj['name'], item['name']) - %> - if ((val & ${ename}) == (uint32_t)${ename}) { - ## toggle the bits to avoid printing overlapping values - ## instead of e.g., FLAG_FOO | FLAG_BAR | FLAG_ALL, this will just - ## print FLAG_FOO | FLAG_BAR (or just FLAG_ALL, depending on order). - val ^= (uint32_t)${ename}; + namespace ${x}::details { + /////////////////////////////////////////////////////////////////////////////// + /// @brief Print ${th.make_enum_name(n, tags, obj)} flag + template<> + inline ${x}_result_t printFlag<${th.make_enum_name(n, tags, obj)}>(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + %for n, item in enumerate(obj['etors']): + <% + ename = th.make_etor_name(n, tags, obj['name'], item['name']) + %> + if ((val & ${ename}) == (uint32_t)${ename}) { + ## toggle the bits to avoid printing overlapping values + ## instead of e.g., FLAG_FOO | FLAG_BAR | FLAG_ALL, this will just + ## print FLAG_FOO | FLAG_BAR (or just FLAG_ALL, depending on order). + val ^= (uint32_t)${ename}; + if (!first) { + os << " | "; + } else { + first = false; + } + os << ${ename}; + } + %endfor + if (val != 0) { + std::bitset<32> bits(val); if (!first) { os << " | "; - } else { - first = false; } - os << ${ename}; - } - %endfor - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; + return ${X}_RESULT_SUCCESS; } -} -} // namespace ${x}_params -%endif -## STRUCT/UNION ############################################################### + } // namespace ${x}::details + %endif +## STRUCT ##################################################################### %elif re.match(r"struct", obj['type']): +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ${th.make_type_name(n, tags, obj)} type +/// @returns +/// std::ostream & inline std::ostream &operator<<(std::ostream &os, const ${obj['type']} ${th.make_type_name(n, tags, obj)} params) { os << "(${obj['type']} ${th.make_type_name(n, tags, obj)}){"; <% @@ -309,9 +348,13 @@ inline std::ostream &operator<<(std::ostream &os, const ${obj['type']} ${th.make os << "}"; return os; } +## UNION ###################################################################### %elif re.match(r"union", obj['type']) and obj['name']: +namespace ${x}::details { <% tag = findUnionTag(obj) %> -inline void ${x}_params::serializeUnion( + /////////////////////////////////////////////////////////////////////////////// + // @brief Print ${th.make_type_name(n, tags, obj)} union + inline ${x}_result_t printUnion( std::ostream &os, const ${obj['type']} ${th.make_type_name(n, tags, obj)} params, const ${tag['type']} ${th.make_type_name(n, tags, tag)} tag @@ -332,10 +375,12 @@ for item in obj['members']: %endfor default: os << ""; - break; + return ${X}_RESULT_ERROR_INVALID_ENUMERATION; } os << "}"; + return ${X}_RESULT_SUCCESS; } +} // namespace ${x}::details %endif %endfor # obj in spec['objects'] %endfor @@ -343,7 +388,11 @@ for item in obj['members']: %for tbl in th.get_pfncbtables(specs, meta, n, tags): %for obj in tbl['functions']: -inline std::ostream &operator<<(std::ostream &os, const struct ${th.make_pfncb_param_type(n, tags, obj)} *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ${th.make_pfncb_param_type(n, tags, obj)} type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ${th.make_pfncb_param_type(n, tags, obj)} *params) { <% params_dict = dict() for item in obj['params']: @@ -360,30 +409,46 @@ inline std::ostream &operator<<(std::ostream &os, const struct ${th.make_pfncb_p %endfor %endfor -namespace ${x}_params { - -template inline void serializePtr(std::ostream &os, T *ptr) { +namespace ${x}::details { +/////////////////////////////////////////////////////////////////////////////// +// @brief Print pointer value +template inline ${x}_result_t printPtr(std::ostream &os, const T *ptr) { if (ptr == nullptr) { os << "nullptr"; } else if constexpr (std::is_pointer_v) { - os << (void *)(ptr) << " ("; - serializePtr(os, *ptr); + os << (const void *)(ptr) << " ("; + printPtr(os, *ptr); os << ")"; } else if constexpr (std::is_void_v || is_handle_v) { - os << (void *)ptr; + os << (const void *)ptr; } else if constexpr (std::is_same_v, char>) { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << ptr; os << ")"; } else { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << *ptr; os << ")"; } + + return ${X}_RESULT_SUCCESS; } +} // namespace ${x}::details + +namespace ${x}::extras { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print function parameters +/// @returns +/// - ::${X}_RESULT_SUCCESS +/// - ::${X}_RESULT_ERROR_INVALID_ENUMERATION +/// - ::${X}_RESULT_ERROR_INVALID_NULL_POINTER +/// - `NULL == params` +inline ${x}_result_t ${X}_APICALL printFunctionParams(std::ostream &os, ur_function_t function, const void *params) { + if (!params) { + return ${X}_RESULT_ERROR_INVALID_NULL_POINTER; + } -inline int serializeFunctionParams(std::ostream &os, uint32_t function, const void *params) { - switch((enum ${x}_function_t)function) { + switch(function) { %for tbl in th.get_pfncbtables(specs, meta, n, tags): %for obj in tbl['functions']: case ${th.make_func_etor(n, tags, obj)}: { @@ -391,10 +456,10 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, const vo } break; %endfor %endfor - default: return -1; + default: return ${X}_RESULT_ERROR_INVALID_ENUMERATION; } - return 0; + return ${X}_RESULT_SUCCESS; } -} // namespace ur_params +} // namespace ${x}::extras -#endif /* ${X}_PARAMS_HPP */ +#endif /* ${X}_PRINT_HPP */ diff --git a/scripts/templates/trcddi.cpp.mako b/scripts/templates/trcddi.cpp.mako index 9a2eb3e319..2ace43072b 100644 --- a/scripts/templates/trcddi.cpp.mako +++ b/scripts/templates/trcddi.cpp.mako @@ -104,13 +104,16 @@ namespace ur_tracing_layer ${x}_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data codelocData) { ${x}_result_t result = ${X}_RESULT_SUCCESS; if(!enabledLayerNames.count(name)) { return result; } + ur_tracing_layer::context.codelocData = codelocData; + %for tbl in th.get_pfntables(specs, meta, n, tags): if( ${X}_RESULT_SUCCESS == result ) { diff --git a/scripts/templates/valddi.cpp.mako b/scripts/templates/valddi.cpp.mako index 862c8b81a5..2e9bac3200 100644 --- a/scripts/templates/valddi.cpp.mako +++ b/scripts/templates/valddi.cpp.mako @@ -60,11 +60,36 @@ namespace ur_validation_layer %endfor %endfor + %if func_name in th.get_event_wait_list_functions(specs, n, tags): + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + %endif + } ${x}_result_t result = ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - %if func_name in create_retain_release_funcs["create"]: + %if func_name == n + "AdapterRelease": + if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) + { + refCountContext.decrementRefCount(${object_param}, true); + } + %elif func_name == n + "AdapterRetain": + if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) + { + refCountContext.incrementRefCount(${object_param}, true); + } + %elif func_name == n + "AdapterGet": + if( context.enableLeakChecking && phAdapters && result == UR_RESULT_SUCCESS ) + { + refCountContext.createOrIncrementRefCount(*phAdapters, true); + } + %elif func_name in create_retain_release_funcs["create"]: if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) { refCountContext.createRefCount(*${object_param}); @@ -79,12 +104,6 @@ namespace ur_validation_layer { refCountContext.decrementRefCount(${object_param}); } - %elif func_name == n + "TearDown": - if ( context.enableLeakChecking ) - { - refCountContext.logInvalidReferences(); - refCountContext.clear(); - } %endif return result; @@ -141,9 +160,10 @@ namespace ur_validation_layer %endfor ${x}_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data) { ${x}_result_t result = ${X}_RESULT_SUCCESS; - + if (enabledLayerNames.count(nameFullValidation)) { enableParameterValidation = true; enableLeakChecking = true; @@ -170,4 +190,14 @@ namespace ur_validation_layer return result; } + ${x}_result_t context_t::tearDown() { + ${x}_result_t result = ${X}_RESULT_SUCCESS; + + if (enableLeakChecking) { + refCountContext.logInvalidReferences(); + refCountContext.clear(); + } + return result; + } + } // namespace ur_validation_layer diff --git a/source/adapters/CMakeLists.txt b/source/adapters/CMakeLists.txt index 3d7700da4a..23e42232de 100644 --- a/source/adapters/CMakeLists.txt +++ b/source/adapters/CMakeLists.txt @@ -32,21 +32,21 @@ add_subdirectory(null) set(INTEL_LLVM_TAG nightly-2023-09-20) -if(UR_BUILD_ADAPTER_L0) +if(UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_ALL) add_subdirectory(level_zero) endif() -if(UR_BUILD_ADAPTER_CUDA) +if(UR_BUILD_ADAPTER_CUDA OR UR_BUILD_ADAPTER_ALL) add_subdirectory(cuda) endif() -if(UR_BUILD_ADAPTER_HIP) +if(UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_ALL) add_subdirectory(hip) endif() -if(UR_BUILD_ADAPTER_OPENCL) +if(UR_BUILD_ADAPTER_OPENCL OR UR_BUILD_ADAPTER_ALL) add_subdirectory(opencl) endif() -if(UR_BUILD_ADAPTER_NATIVE_CPU) +if(UR_BUILD_ADAPTER_NATIVE_CPU OR UR_BUILD_ADAPTER_ALL) add_subdirectory(native_cpu) endif() diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index 057c03a93d..3c18c78bd1 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -5,8 +5,10 @@ EXPORTS urGetCommandBufferExpProcAddrTable urGetContextProcAddrTable urGetEnqueueProcAddrTable + urGetEnqueueExpProcAddrTable urGetEventProcAddrTable urGetKernelProcAddrTable + urGetKernelExpProcAddrTable urGetMemProcAddrTable urGetPhysicalMemProcAddrTable urGetPlatformProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index 114c6168bd..bb08ae7d88 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -5,8 +5,10 @@ urGetCommandBufferExpProcAddrTable; urGetContextProcAddrTable; urGetEnqueueProcAddrTable; + urGetEnqueueExpProcAddrTable; urGetEventProcAddrTable; urGetKernelProcAddrTable; + urGetKernelExpProcAddrTable; urGetMemProcAddrTable; urGetPhysicalMemProcAddrTable; urGetPlatformProcAddrTable; diff --git a/source/adapters/cuda/adapter.cpp b/source/adapters/cuda/adapter.cpp index ca80a99f68..5b897a8768 100644 --- a/source/adapters/cuda/adapter.cpp +++ b/source/adapters/cuda/adapter.cpp @@ -22,15 +22,6 @@ struct ur_adapter_handle_t_ { ur_adapter_handle_t_ adapter{}; -UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, - ur_loader_config_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index ad46884bf1..24a5d9497c 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -140,8 +140,19 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { try { + const unsigned long long flags = 0; +#if CUDA_VERSION >= 12000 UR_CHECK_ERROR(cuGraphInstantiate(&hCommandBuffer->CudaGraphExec, - hCommandBuffer->CudaGraph, 0)); + hCommandBuffer->CudaGraph, flags)); +#elif CUDA_VERSION >= 11040 + UR_CHECK_ERROR(cuGraphInstantiateWithFlags( + &hCommandBuffer->CudaGraphExec, hCommandBuffer->CudaGraph, flags)); +#else + // Cannot use flags + UR_CHECK_ERROR(cuGraphInstantiate(&hCommandBuffer->CudaGraphExec, + hCommandBuffer->CudaGraph, nullptr, + nullptr, 0)); +#endif } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } @@ -236,7 +247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -270,7 +281,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, @@ -314,7 +325,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -356,7 +367,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, const void *pSrc, uint32_t numSyncPointsInWaitList, @@ -394,7 +405,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -431,7 +442,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -473,7 +484,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index 8f52942b89..e3258f379d 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -202,8 +202,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterRelease = urAdapterRelease; pDdiTable->pfnAdapterRetain = urAdapterRetain; @@ -280,17 +278,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteExp = + urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; @@ -390,19 +388,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnCooperativeKernelLaunchExp = nullptr; + + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr; + + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } + pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - return retVal; + + return UR_RESULT_SUCCESS; } #if defined(__cplusplus) diff --git a/source/adapters/hip/adapter.cpp b/source/adapters/hip/adapter.cpp index 662717f1bd..4691d78913 100644 --- a/source/adapters/hip/adapter.cpp +++ b/source/adapters/hip/adapter.cpp @@ -20,15 +20,6 @@ struct ur_adapter_handle_t_ { ur_adapter_handle_t_ adapter{}; -UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, - ur_loader_config_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( uint32_t, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (phAdapters) { diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 3f68b88d8d..d2cd156719 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -52,7 +52,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t, void *, const void *, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -61,7 +61,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -70,7 +70,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, @@ -81,7 +81,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, const void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -91,7 +91,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -101,7 +101,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, @@ -112,7 +112,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 8298d513d8..73ac777edb 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -40,15 +40,13 @@ ur_context_handle_t_::getOwningURPool(umf_memory_pool_t *UMFPool) { UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { - std::ignore = DeviceCount; - assert(DeviceCount == 1); ur_result_t RetErr = UR_RESULT_SUCCESS; std::unique_ptr ContextPtr{nullptr}; try { // Create a scoped context. ContextPtr = std::unique_ptr( - new ur_context_handle_t_{*phDevices}); + new ur_context_handle_t_{phDevices, DeviceCount}); static std::once_flag InitFlag; std::call_once( @@ -78,9 +76,9 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, switch (uint32_t{propName}) { case UR_CONTEXT_INFO_NUM_DEVICES: - return ReturnValue(1); + return ReturnValue(static_cast(hContext->Devices.size())); case UR_CONTEXT_INFO_DEVICES: - return ReturnValue(hContext->getDevice()); + return ReturnValue(hContext->getDevices()); case UR_CONTEXT_INFO_REFERENCE_COUNT: return ReturnValue(hContext->getReferenceCount()); case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: @@ -124,8 +122,10 @@ urContextRetain(ur_context_handle_t hContext) { UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { + // FIXME: this entry point has been deprecated in the SYCL RT and should be + // changed to unsupported once the deprecation period has elapsed *phNativeContext = reinterpret_cast( - hContext->getDevice()->getNativeContext()); + hContext->getDevices()[0]->getNativeContext()); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/context.hpp b/source/adapters/hip/context.hpp index 8191a05408..69d4df9b6d 100644 --- a/source/adapters/hip/context.hpp +++ b/source/adapters/hip/context.hpp @@ -10,7 +10,6 @@ #pragma once #include -#include #include "common.hpp" #include "device.hpp" @@ -29,26 +28,26 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); /// /// One of the main differences between the UR API and the HIP driver API is /// that the second modifies the state of the threads by assigning -/// `hipCtx_t` objects to threads. `hipCtx_t` objects store data associated +/// \c hipCtx_t objects to threads. \c hipCtx_t objects store data associated /// with a given device and control access to said device from the user side. /// UR API context are objects that are passed to functions, and not bound /// to threads. -/// The ur_context_handle_t_ object doesn't implement this behavior. It only -/// holds the HIP context data. The RAII object \ref ScopedContext implements -/// the active context behavior. /// -/// Primary vs UserDefined context +/// Since the \c ur_context_handle_t can contain multiple devices, and a \c +/// hipCtx_t refers to only a single device, the \c hipCtx_t is more tightly +/// coupled to a \c ur_device_handle_t than a \c ur_context_handle_t. In order +/// to remove some ambiguities about the different semantics of \c +/// \c ur_context_handle_t and native \c hipCtx_t, we access the native \c +/// hipCtx_t solely through the \c ur_device_handle_t class, by using the object +/// \ref ScopedContext, which sets the active device (by setting the active +/// native \c hipCtx_t). /// -/// HIP has two different types of context, the Primary context, -/// which is usable by all threads on a given process for a given device, and -/// the aforementioned custom contexts. -/// The HIP documentation, and performance analysis, suggest using the Primary -/// context whenever possible. The Primary context is also used by the HIP -/// Runtime API. For UR applications to interop with HIP Runtime API, they have -/// to use the primary context - and make that active in the thread. The -/// `ur_context_handle_t_` object can be constructed with a `kind` parameter -/// that allows to construct a Primary or `UserDefined` context, so that -/// the UR object interface is always the same. +/// Primary vs User-defined \c hipCtx_t +/// +/// HIP has two different types of \c hipCtx_t, the Primary context, which is +/// usable by all threads on a given process for a given device, and the +/// aforementioned custom \c hipCtx_t s. The HIP documentation, confirmed with +/// performance analysis, suggest using the Primary context whenever possible. /// /// Destructor callback /// @@ -58,6 +57,16 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); /// See proposal for details. /// https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md /// +/// Memory Management for Devices in a Context <\b> +/// +/// A \c ur_mem_handle_t is associated with a \c ur_context_handle_t_, which +/// may refer to multiple devices. Therefore the \c ur_mem_handle_t must +/// handle a native allocation for each device in the context. UR is +/// responsible for automatically handling event dependencies for kernels +/// writing to or reading from the same \c ur_mem_handle_t and migrating memory +/// between native allocations for devices in the same \c ur_context_handle_t_ +/// if necessary. +/// struct ur_context_handle_t_ { struct deleter_data { @@ -69,15 +78,22 @@ struct ur_context_handle_t_ { using native_type = hipCtx_t; - ur_device_handle_t DeviceId; + std::vector Devices; + std::atomic_uint32_t RefCount; - ur_context_handle_t_(ur_device_handle_t DevId) - : DeviceId{DevId}, RefCount{1} { - urDeviceRetain(DeviceId); + ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) + : Devices{Devs, Devs + NumDevices}, RefCount{1} { + for (auto &Dev : Devices) { + urDeviceRetain(Dev); + } }; - ~ur_context_handle_t_() { urDeviceRelease(DeviceId); } + ~ur_context_handle_t_() { + for (auto &Dev : Devices) { + urDeviceRelease(Dev); + } + } void invokeExtendedDeleters() { std::lock_guard Guard(Mutex); @@ -92,7 +108,9 @@ struct ur_context_handle_t_ { ExtendedDeleters.emplace_back(deleter_data{Function, UserData}); } - ur_device_handle_t getDevice() const noexcept { return DeviceId; } + const std::vector &getDevices() const noexcept { + return Devices; + } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } @@ -106,104 +124,32 @@ struct ur_context_handle_t_ { ur_usm_pool_handle_t getOwningURPool(umf_memory_pool_t *UMFPool); - /// We need to keep track of USM mappings in AMD HIP, as certain extra - /// synchronization *is* actually required for correctness. - /// During kernel enqueue we must dispatch a prefetch for each kernel argument - /// that points to a USM mapping to ensure the mapping is correctly - /// populated on the device (https://github.com/intel/llvm/issues/7252). Thus, - /// we keep track of mappings in the context, and then check against them just - /// before the kernel is launched. The stream against which the kernel is - /// launched is not known until enqueue time, but the USM mappings can happen - /// at any time. Thus, they are tracked on the context used for the urUSM* - /// mapping. - /// - /// The three utility function are simple wrappers around a mapping from a - /// pointer to a size. - void addUSMMapping(void *Ptr, size_t Size) { - std::lock_guard Guard(Mutex); - assert(USMMappings.find(Ptr) == USMMappings.end() && - "mapping already exists"); - USMMappings[Ptr] = Size; - } - - void removeUSMMapping(const void *Ptr) { - std::lock_guard guard(Mutex); - auto It = USMMappings.find(Ptr); - if (It != USMMappings.end()) - USMMappings.erase(It); - } - - std::pair getUSMMapping(const void *Ptr) { - std::lock_guard Guard(Mutex); - auto It = USMMappings.find(Ptr); - // The simple case is the fast case... - if (It != USMMappings.end()) - return *It; - - // ... but in the failure case we have to fall back to a full scan to search - // for "offset" pointers in case the user passes in the middle of an - // allocation. We have to do some not-so-ordained-by-the-standard ordered - // comparisons of pointers here, but it'll work on all platforms we support. - uintptr_t PtrVal = (uintptr_t)Ptr; - for (std::pair Pair : USMMappings) { - uintptr_t BaseAddr = (uintptr_t)Pair.first; - uintptr_t EndAddr = BaseAddr + Pair.second; - if (PtrVal > BaseAddr && PtrVal < EndAddr) { - // If we've found something now, offset *must* be nonzero - assert(Pair.second); - return Pair; - } - } - return {nullptr, 0}; - } - private: std::mutex Mutex; std::vector ExtendedDeleters; - std::unordered_map USMMappings; std::set PoolHandles; }; namespace { -/// RAII type to guarantee recovering original HIP context -/// Scoped context is used across all UR HIP plugin implementation -/// to activate the UR Context on the current thread, matching the -/// HIP driver semantics where the context used for the HIP Driver -/// API is the one active on the thread. -/// The implementation tries to avoid replacing the hipCtx_t if it cans +/// Scoped context is used across all UR HIP plugin implementation to activate +/// the native Context on the current thread. The ScopedContext does not +/// reinstate the previous context as all operations in the hip adapter that +/// require an active context, set the active context and don't rely on context +/// reinstation class ScopedContext { - hipCtx_t Original; - bool NeedToRecover; - public: - ScopedContext(ur_device_handle_t hDevice) : NeedToRecover{false} { + ScopedContext(ur_device_handle_t hDevice) { + hipCtx_t Original{}; if (!hDevice) { throw UR_RESULT_ERROR_INVALID_DEVICE; } - // FIXME when multi device context are supported in HIP adapter hipCtx_t Desired = hDevice->getNativeContext(); UR_CHECK_ERROR(hipCtxGetCurrent(&Original)); if (Original != Desired) { // Sets the desired context as the active one for the thread UR_CHECK_ERROR(hipCtxSetCurrent(Desired)); - if (Original == nullptr) { - // No context is installed on the current thread - // This is the most common case. We can activate the context in the - // thread and leave it there until all the UR context referring to the - // same underlying HIP context are destroyed. This emulates - // the behaviour of the HIP runtime api, and avoids costly context - // switches. No action is required on this side of the if. - } else { - NeedToRecover = true; - } - } - } - - ~ScopedContext() { - if (NeedToRecover) { - UR_CHECK_ERROR(hipCtxSetCurrent(Original)); } } }; diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 5b473c050e..139906e95a 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -549,6 +549,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, SupportedExtensions += "cl_khr_fp64 "; } + SupportedExtensions += "cl_khr_fp16 "; + return ReturnValue(SupportedExtensions.c_str()); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index 83cc2ee954..bea2c46fb5 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -25,12 +25,13 @@ struct ur_device_handle_t_ { std::atomic_uint32_t RefCount; ur_platform_handle_t Platform; hipCtx_t HIPContext; + uint32_t DeviceIndex; public: ur_device_handle_t_(native_type HipDevice, hipCtx_t Context, - ur_platform_handle_t Platform) + ur_platform_handle_t Platform, uint32_t DeviceIndex) : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), - HIPContext(Context) {} + HIPContext(Context), DeviceIndex(DeviceIndex) {} ~ur_device_handle_t_() { UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice)); @@ -42,7 +43,11 @@ struct ur_device_handle_t_ { ur_platform_handle_t getPlatform() const noexcept { return Platform; }; - hipCtx_t getNativeContext() { return HIPContext; }; + hipCtx_t getNativeContext() const noexcept { return HIPContext; }; + + // Returns the index of the device relative to the other devices in the same + // platform + uint32_t getIndex() const noexcept { return DeviceIndex; }; }; int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute); diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 9f9e3f71fd..c744de6424 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -36,19 +36,18 @@ static size_t imageElementByteSize(hipArray_Format ArrayFormat) { return 0; } -ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, - hipStream_t Stream, uint32_t NumEventsInWaitList, +ur_result_t enqueueEventsWait(ur_queue_handle_t, hipStream_t Stream, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { if (!EventWaitList) { return UR_RESULT_SUCCESS; } try { - ScopedContext Active(CommandQueue->getDevice()); - auto Result = forLatestEvents( EventWaitList, NumEventsInWaitList, [Stream](ur_event_handle_t Event) -> ur_result_t { - if (Event->getStream() == Stream) { + ScopedContext Active(Event->getDevice()); + if (Event->isCompleted() || Event->getStream() == Stream) { return UR_RESULT_SUCCESS; } else { UR_CHECK_ERROR(hipStreamWaitEvent(Stream, Event->get(), 0)); @@ -151,6 +150,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetImplEvent{nullptr}; @@ -158,8 +158,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); if (phEvent) { RetImplEvent = @@ -168,9 +168,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( UR_CHECK_ERROR(RetImplEvent->start()); } - UR_CHECK_ERROR(hipMemcpyHtoDAsync( - std::get(hBuffer->Mem).getWithOffset(offset), - const_cast(pSrc), size, HIPStream)); + UR_CHECK_ERROR( + hipMemcpyHtoDAsync(std::get(hBuffer->Mem) + .getPtrWithOffset(hQueue->getDevice(), offset), + const_cast(pSrc), size, HIPStream)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -197,15 +198,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetImplEvent{nullptr}; + ur_lock MemoryMigrationLock{hBuffer->MemoryMigrationMutex}; + auto Device = hQueue->getDevice(); + hipStream_t HIPStream = hQueue->getNextTransferStream(); + try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + // Note that this entry point may be called on a queue that may not be the + // last queue to write to the MemBuffer, meaning we must perform the copy + // from a different device + if (hBuffer->LastEventWritingToMemObj && + hBuffer->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { + Device = hBuffer->LastEventWritingToMemObj->getDevice(); + ScopedContext Active(Device); + HIPStream = hipStream_t{0}; // Default stream for different device + // We may have to wait for an event on another queue if it is the last + // event writing to mem obj + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, 1, + &hBuffer->LastEventWritingToMemObj)); + } + + ScopedContext Active(Device); + + // Use the default stream if copying from another device + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); if (phEvent) { RetImplEvent = @@ -214,9 +234,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( UR_CHECK_ERROR(RetImplEvent->start()); } + // Copying from the device with latest version of memory, not necessarily + // the device associated with the Queue UR_CHECK_ERROR(hipMemcpyDtoHAsync( - pDst, std::get(hBuffer->Mem).getWithOffset(offset), size, - HIPStream)); + pDst, + std::get(hBuffer->Mem).getPtrWithOffset(Device, offset), + size, HIPStream)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -231,9 +254,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( } } catch (ur_result_t err) { - Result = err; + return err; } - return Result; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( @@ -246,9 +269,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + std::vector DepEvents( + phEventWaitList, phEventWaitList + numEventsInWaitList); + std::vector> MemMigrationLocks; + + // phEventWaitList only contains events that are handed to UR by the SYCL + // runtime. However since UR handles memory dependencies within a context + // we may need to add more events to our dependent events list if the UR + // context contains multiple devices + if (hQueue->getContext()->Devices.size() > 1) { + MemMigrationLocks.reserve(hKernel->Args.MemObjArgs.size()); + for (auto &MemArg : hKernel->Args.MemObjArgs) { + bool PushBack = false; + if (auto MemDepEvent = MemArg.Mem->LastEventWritingToMemObj; + MemDepEvent && std::find(DepEvents.begin(), DepEvents.end(), + MemDepEvent) == DepEvents.end()) { + DepEvents.push_back(MemDepEvent); + PushBack = true; + } + if ((MemArg.AccessFlags & + (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY)) || + PushBack) { + if (std::find_if(MemMigrationLocks.begin(), MemMigrationLocks.end(), + [MemArg](auto &Lock) { + return Lock.first == MemArg.Mem; + }) == MemMigrationLocks.end()) + MemMigrationLocks.emplace_back( + std::pair{MemArg.Mem, ur_lock{MemArg.Mem->MemoryMigrationMutex}}); + } + } + } + + // Early exit for zero size range kernel if (*pGlobalWorkSize == 0) { - return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, - phEventWaitList, phEvent); + if (DepEvents.size()) { + return urEnqueueEventsWaitWithBarrier(hQueue, DepEvents.size(), + phEventWaitList, phEvent); + } + return UR_RESULT_SUCCESS; } // Set the number of threads per block to the number of threads per warp @@ -314,7 +372,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( try { ur_device_handle_t Dev = hQueue->getDevice(); ScopedContext Active(Dev); - ur_context_handle_t Ctx = hQueue->getContext(); uint32_t StreamToken; ur_stream_quard Guard; @@ -322,16 +379,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( numEventsInWaitList, phEventWaitList, Guard, &StreamToken); hipFunction_t HIPFunc = hKernel->get(); - hipDevice_t HIPDev = Dev->get(); - for (const void *P : hKernel->getPtrArgs()) { - auto [Addr, Size] = Ctx->getUSMMapping(P); - if (!Addr) - continue; - if (hipMemPrefetchAsync(Addr, Size, HIPDev, HIPStream) != hipSuccess) - return UR_RESULT_ERROR_INVALID_KERNEL_ARGS; + if (DepEvents.size()) { + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, DepEvents.size(), + DepEvents.data())); + } + + // For memory migration across devices in the same context + if (hQueue->getContext()->Devices.size() > 1) { + for (auto &MemArg : hKernel->Args.MemObjArgs) { + migrateMemoryToDeviceIfNeeded(MemArg.Mem, hQueue->getDevice()); + } } - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); // Set the implicit global offset parameter if kernel has offset variant if (hKernel->getWithOffsetParameter()) { @@ -358,6 +416,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( UR_CHECK_ERROR(RetImplEvent->start()); } + // Once event has been started we can unlock MemoryMigrationMutex + if (hQueue->getContext()->Devices.size() > 1) { + for (auto &MemArg : hKernel->Args.MemObjArgs) { + // Telling the ur_mem_handle_t that it will need to wait on this kernel + // if it has been written to + if (phEvent && (MemArg.AccessFlags & + (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY))) { + MemArg.Mem->setLastEventWritingToMemObj(RetImplEvent.get()); + } + } + // We can release the MemoryMigrationMutexes now + MemMigrationLocks.clear(); + } + // Set local mem max size if env var is present static const char *LocalMemSzPtrUR = std::getenv("UR_HIP_MAX_LOCAL_MEM_SIZE"); @@ -371,7 +443,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( int DeviceMaxLocalMem = 0; UR_CHECK_ERROR(hipDeviceGetAttribute( &DeviceMaxLocalMem, hipDeviceAttributeMaxSharedMemoryPerBlock, - HIPDev)); + Dev->get())); static const int EnvVal = std::atoi(LocalMemSzPtr); if (EnvVal <= 0 || EnvVal > DeviceMaxLocalMem) { @@ -574,16 +646,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( UR_ASSERT(!(hostSlicePitch != 0 && hostSlicePitch % hostRowPitch != 0), UR_RESULT_ERROR_INVALID_SIZE); - ur_result_t Result = UR_RESULT_SUCCESS; - void *DevPtr = std::get(hBuffer->Mem).getVoid(); std::unique_ptr RetImplEvent{nullptr}; + ur_result_t Result = UR_RESULT_SUCCESS; + ur_lock MemoryMigrationLock(hBuffer->MemoryMigrationMutex); + auto Device = hQueue->getDevice(); + hipStream_t HIPStream = hQueue->getNextTransferStream(); + try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); + // Note that this entry point may be called on a queue that may not be the + // last queue to write to the MemBuffer, meaning we must perform the copy + // from a different device + if (hBuffer->LastEventWritingToMemObj && + hBuffer->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { + Device = hBuffer->LastEventWritingToMemObj->getDevice(); + ScopedContext Active(Device); + HIPStream = hipStream_t{0}; // Default stream for different device + // We may have to wait for an event on another queue if it is the last + // event writing to mem obj + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, 1, + &hBuffer->LastEventWritingToMemObj)); + } - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + ScopedContext Active(Device); + + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); if (phEvent) { RetImplEvent = @@ -592,10 +680,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( UR_CHECK_ERROR(RetImplEvent->start()); } - Result = commonEnqueueMemBufferCopyRect( + void *DevPtr = std::get(hBuffer->Mem).getVoid(Device); + UR_CHECK_ERROR(commonEnqueueMemBufferCopyRect( HIPStream, region, &DevPtr, hipMemoryTypeDevice, bufferOrigin, bufferRowPitch, bufferSlicePitch, pDst, hipMemoryTypeHost, hostOrigin, - hostRowPitch, hostSlicePitch); + hostRowPitch, hostSlicePitch)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -623,7 +712,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { ur_result_t Result = UR_RESULT_SUCCESS; - void *DevPtr = std::get(hBuffer->Mem).getVoid(); + void *DevPtr = std::get(hBuffer->Mem).getVoid(hQueue->getDevice()); std::unique_ptr RetImplEvent{nullptr}; try { @@ -691,8 +780,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( UR_CHECK_ERROR(RetImplEvent->start()); } - auto Src = std::get(hBufferSrc->Mem).getWithOffset(srcOffset); - auto Dst = std::get(hBufferDst->Mem).getWithOffset(dstOffset); + auto Src = std::get(hBufferSrc->Mem) + .getPtrWithOffset(hQueue->getDevice(), srcOffset); + auto Dst = std::get(hBufferDst->Mem) + .getPtrWithOffset(hQueue->getDevice(), dstOffset); UR_CHECK_ERROR(hipMemcpyDtoDAsync(Dst, Src, size, Stream)); @@ -717,8 +808,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { ur_result_t Result = UR_RESULT_SUCCESS; - void *SrcPtr = std::get(hBufferSrc->Mem).getVoid(); - void *DstPtr = std::get(hBufferDst->Mem).getVoid(); + void *SrcPtr = + std::get(hBufferSrc->Mem).getVoid(hQueue->getDevice()); + void *DstPtr = + std::get(hBufferDst->Mem).getVoid(hQueue->getDevice()); std::unique_ptr RetImplEvent{nullptr}; try { @@ -827,7 +920,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( UR_CHECK_ERROR(RetImplEvent->start()); } - auto DstDevice = std::get(hBuffer->Mem).getWithOffset(offset); + auto DstDevice = std::get(hBuffer->Mem) + .getPtrWithOffset(hQueue->getDevice(), offset); auto N = size / patternSize; // pattern size in bytes @@ -947,21 +1041,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); ur_result_t Result = UR_RESULT_SUCCESS; + ur_lock MemoryMigrationLock{hImage->MemoryMigrationMutex}; + auto Device = hQueue->getDevice(); + hipStream_t HIPStream = hQueue->getNextTransferStream(); + try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); + // Note that this entry point may be called on a queue that may not be the + // last queue to write to the MemBuffer, meaning we must perform the copy + // from a different device + if (hImage->LastEventWritingToMemObj && + hImage->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { + Device = hImage->LastEventWritingToMemObj->getDevice(); + ScopedContext Active(Device); + HIPStream = hipStream_t{0}; // Default stream for different device + // We may have to wait for an event on another queue if it is the last + // event writing to mem obj + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, 1, + &hImage->LastEventWritingToMemObj)); + } + + ScopedContext Active(Device); if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); } - hipArray *Array = std::get(hImage->Mem).getArray(); + hipArray *Array = std::get(hImage->Mem).getArray(Device); hipArray_Format Format; size_t NumChannels; @@ -1015,8 +1125,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t, void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); ur_result_t Result = UR_RESULT_SUCCESS; @@ -1029,7 +1138,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( phEventWaitList); } - hipArray *Array = std::get(hImage->Mem).getArray(); + hipArray *Array = + std::get(hImage->Mem).getArray(hQueue->getDevice()); hipArray_Format Format; size_t NumChannels; @@ -1082,10 +1192,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( ur_rect_offset_t dstOrigin, ur_rect_region_t region, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImageSrc->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(hImageDst->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImageSrc->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImageDst->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(std::get(hImageSrc->Mem).getImageType() == std::get(hImageDst->Mem).getImageType(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -1100,12 +1208,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( phEventWaitList); } - hipArray *SrcArray = std::get(hImageSrc->Mem).getArray(); + hipArray *SrcArray = + std::get(hImageSrc->Mem).getArray(hQueue->getDevice()); hipArray_Format SrcFormat; size_t SrcNumChannels; getArrayDesc(SrcArray, SrcFormat, SrcNumChannels); - hipArray *DstArray = std::get(hImageDst->Mem).getArray(); + hipArray *DstArray = + std::get(hImageDst->Mem).getArray(hQueue->getDevice()); hipArray_Format DstFormat; size_t DstNumChannels; getArrayDesc(DstArray, DstFormat, DstNumChannels); @@ -1166,8 +1276,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - UR_ASSERT(hBuffer->MemType == ur_mem_handle_t_::Type::Buffer, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); auto &BufferImpl = std::get(hBuffer->Mem); UR_ASSERT(offset + size <= BufferImpl.getSize(), UR_RESULT_ERROR_INVALID_SIZE); @@ -1226,8 +1335,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { ur_result_t Result = UR_RESULT_SUCCESS; - UR_ASSERT(hMem->MemType == ur_mem_handle_t_::Type::Buffer, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hMem->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(std::get(hMem->Mem).getMapPtr() != nullptr, UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(std::get(hMem->Mem).getMapPtr() == pMappedPtr, @@ -1367,7 +1475,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { void *HIPDevicePtr = const_cast(pMem); - ur_device_handle_t Device = hQueue->getContext()->getDevice(); + ur_device_handle_t Device = hQueue->getDevice(); // If the device does not support managed memory access, we can't set // mem_advise. diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index 4871335c9f..2af6c5e910 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -193,7 +193,7 @@ urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { try { auto Context = phEventWaitList[0]->getContext(); - ScopedContext Active(Context->getDevice()); + ScopedContext Active(phEventWaitList[0]->getDevice()); auto WaitFunc = [Context](ur_event_handle_t Event) -> ur_result_t { UR_ASSERT(Event, UR_RESULT_ERROR_INVALID_EVENT); @@ -292,7 +292,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { std::unique_ptr event_ptr{hEvent}; ur_result_t Result = UR_RESULT_ERROR_INVALID_EVENT; try { - ScopedContext Active(hEvent->getContext()->getDevice()); Result = hEvent->release(); } catch (...) { Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; diff --git a/source/adapters/hip/event.hpp b/source/adapters/hip/event.hpp index bfa05b59d7..ecb995dfbe 100644 --- a/source/adapters/hip/event.hpp +++ b/source/adapters/hip/event.hpp @@ -28,6 +28,8 @@ struct ur_event_handle_t_ { ur_queue_handle_t getQueue() const noexcept { return Queue; } + ur_device_handle_t getDevice() const noexcept { return Queue->getDevice(); } + hipStream_t getStream() const noexcept { return Stream; } uint32_t getComputeStreamToken() const noexcept { return StreamToken; } diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index 642743ddbf..ec58bafcc6 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -19,7 +19,7 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, std::unique_ptr RetKernel{nullptr}; try { - ScopedContext Active(hProgram->getContext()->getDevice()); + ScopedContext Active(hProgram->getDevice()); hipFunction_t HIPFunc; hipError_t KernelError = @@ -259,13 +259,14 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { - hKernel->setKernelPtrArg(argIndex, sizeof(pArgValue), pArgValue); + hKernel->setKernelArg(argIndex, sizeof(pArgValue), pArgValue); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( - ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *Properties, + ur_mem_handle_t hArgValue) { // Below sets kernel arg when zero-sized buffers are handled. // In such case the corresponding memory is null. if (hArgValue == nullptr) { @@ -275,8 +276,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_result_t Result = UR_RESULT_SUCCESS; try { - if (hArgValue->MemType == ur_mem_handle_t_::Type::Surface) { - auto array = std::get(hArgValue->Mem).getArray(); + auto Device = hKernel->getProgram()->getDevice(); + hKernel->Args.addMemObjArg(argIndex, hArgValue, Properties->memoryAccess); + if (hArgValue->isImage()) { + auto array = std::get(hArgValue->Mem).getArray(Device); hipArray_Format Format; size_t NumChannels; getArrayDesc(array, Format, NumChannels); @@ -288,10 +291,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( "uint32, float, and half."); } hipSurfaceObject_t hipSurf = - std::get(hArgValue->Mem).getSurface(); + std::get(hArgValue->Mem).getSurface(Device); hKernel->setKernelArg(argIndex, sizeof(hipSurf), (void *)&hipSurf); } else { - void *HIPPtr = std::get(hArgValue->Mem).getVoid(); + void *HIPPtr = std::get(hArgValue->Mem).getVoid(Device); hKernel->setKernelArg(argIndex, sizeof(void *), (void *)&HIPPtr); } } catch (ur_result_t Err) { diff --git a/source/adapters/hip/kernel.hpp b/source/adapters/hip/kernel.hpp index 3db9dce19a..83693a3d41 100644 --- a/source/adapters/hip/kernel.hpp +++ b/source/adapters/hip/kernel.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include "program.hpp" @@ -58,7 +57,14 @@ struct ur_kernel_handle_t_ { args_size_t ParamSizes; args_index_t Indices; args_size_t OffsetPerIndex; - std::set PtrArgs; + // A struct to keep track of memargs so that we can do dependency analysis + // at urEnqueueKernelLaunch + struct mem_obj_arg { + ur_mem_handle_t_ *Mem; + int Index; + ur_mem_flags_t AccessFlags; + }; + std::vector MemObjArgs; std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0}; @@ -112,6 +118,20 @@ struct ur_kernel_handle_t_ { Size + AlignedLocalOffset - LocalOffset); } + void addMemObjArg(int Index, ur_mem_handle_t hMem, ur_mem_flags_t Flags) { + assert(hMem && "Invalid mem handle"); + // To avoid redundancy we are not storing mem obj with index i at index + // i in the vec of MemObjArgs. + for (auto &Arg : MemObjArgs) { + if (Arg.Index == Index) { + // Overwrite the mem obj with the same index + Arg = arguments::mem_obj_arg{hMem, Index, Flags}; + return; + } + } + MemObjArgs.push_back(arguments::mem_obj_arg{hMem, Index, Flags}); + } + void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) { assert(Size == sizeof(std::uint32_t) * 3); std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size); @@ -169,29 +189,16 @@ struct ur_kernel_handle_t_ { const char *getName() const noexcept { return Name.c_str(); } - /// Get the number of kernel arguments, excluding the implicit global offset. - /// Note this only returns the current known number of arguments, not the - /// real one required by the kernel, since this cannot be queried from - /// the HIP Driver API + /// Get the number of kernel arguments, excluding the implicit global + /// offset. Note this only returns the current known number of arguments, + /// not the real one required by the kernel, since this cannot be queried + /// from the HIP Driver API uint32_t getNumArgs() const noexcept { return Args.Indices.size() - 1; } void setKernelArg(int Index, size_t Size, const void *Arg) { Args.addArg(Index, Size, Arg); } - /// We track all pointer arguments to be able to issue prefetches at enqueue - /// time - void setKernelPtrArg(int Index, size_t Size, const void *PtrArg) { - Args.PtrArgs.insert(*static_cast(PtrArg)); - setKernelArg(Index, Size, PtrArg); - } - - bool isPtrArg(const void *ptr) { - return Args.PtrArgs.find(ptr) != Args.PtrArgs.end(); - } - - std::set &getPtrArgs() { return Args.PtrArgs; } - void setKernelLocalArg(int Index, size_t Size) { Args.addLocalArg(Index, Size); } diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 3083d47744..68ded26263 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -55,28 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { return UR_RESULT_SUCCESS; } - ScopedContext Active(uniqueMemObj->getContext()->getDevice()); - - if (hMem->MemType == ur_mem_handle_t_::Type::Buffer) { - auto &hBuffer = std::get(uniqueMemObj->Mem); - switch (hBuffer.MemAllocMode) { - case BufferMem::AllocMode::CopyIn: - case BufferMem::AllocMode::Classic: - UR_CHECK_ERROR(hipFree((void *)hBuffer.Ptr)); - break; - case BufferMem::AllocMode::UseHostPtr: - UR_CHECK_ERROR(hipHostUnregister(hBuffer.HostPtr)); - break; - case BufferMem::AllocMode::AllocHostPtr: - UR_CHECK_ERROR(hipFreeHost(hBuffer.HostPtr)); - }; - } - - else if (hMem->MemType == ur_mem_handle_t_::Type::Surface) { - auto &hImage = std::get(uniqueMemObj->Mem); - UR_CHECK_ERROR(hipDestroySurfaceObject(hImage.getSurface())); - UR_CHECK_ERROR(hipFreeArray(hImage.getArray())); - } + UR_CHECK_ERROR(hMem->clear()); } catch (ur_result_t Err) { Result = Err; @@ -123,49 +102,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_mem_handle_t RetMemObj = nullptr; try { - ScopedContext Active(hContext->getDevice()); - void *Ptr; - auto pHost = pProperties ? pProperties->pHost : nullptr; + auto HostPtr = pProperties ? pProperties->pHost : nullptr; BufferMem::AllocMode AllocMode = BufferMem::AllocMode::Classic; - if ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && EnableUseHostPtr) { - UR_CHECK_ERROR(hipHostRegister(pHost, size, hipHostRegisterMapped)); - UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0)); AllocMode = BufferMem::AllocMode::UseHostPtr; } else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { - UR_CHECK_ERROR(hipHostMalloc(&pHost, size)); - UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0)); + UR_CHECK_ERROR(hipHostMalloc(&HostPtr, size)); AllocMode = BufferMem::AllocMode::AllocHostPtr; - } else { - UR_CHECK_ERROR(hipMalloc(&Ptr, size)); - if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { - AllocMode = BufferMem::AllocMode::CopyIn; - } + } else if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + AllocMode = BufferMem::AllocMode::CopyIn; } - if (Result == UR_RESULT_SUCCESS) { - ur_mem_handle_t parentBuffer = nullptr; - - auto DevPtr = reinterpret_cast(Ptr); - auto URMemObj = std::unique_ptr(new ur_mem_handle_t_{ - hContext, parentBuffer, flags, AllocMode, DevPtr, pHost, size}); - if (URMemObj != nullptr) { - RetMemObj = URMemObj.release(); - if (PerformInitialCopy) { - // Operates on the default stream of the current HIP context. - UR_CHECK_ERROR(hipMemcpyHtoD(DevPtr, pHost, size)); - // Synchronize with default stream implicitly used by hipMemcpyHtoD - // to make buffer data available on device before any other UR call - // uses it. - if (Result == UR_RESULT_SUCCESS) { - hipStream_t defaultStream = 0; - UR_CHECK_ERROR(hipStreamSynchronize(defaultStream)); - } - } - } else { - Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + auto URMemObj = std::unique_ptr( + new ur_mem_handle_t_{hContext, flags, AllocMode, HostPtr, size}); + if (URMemObj == nullptr) { + throw UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // First allocation will be made at urMemBufferCreate if context only + // has one device + if (PerformInitialCopy && HostPtr) { + // Perform initial copy to every device in context + for (auto &Device : hContext->getDevices()) { + ScopedContext Active(Device); + // getPtr may allocate mem if not already allocated + const auto &Ptr = std::get(URMemObj->Mem).getPtr(Device); + UR_CHECK_ERROR(hipMemcpyHtoD(Ptr, HostPtr, size)); + // TODO check if we can remove this + // Synchronize with default stream implicitly used by cuMemcpyHtoD + // to make buffer data available on device before any other UR + // call uses it. + // hipStream_t defaultStream = 0; + // UR_CHECK_ERROR(hipStreamSynchronize(defaultStream)); } } + RetMemObj = URMemObj.release(); } catch (ur_result_t Err) { Result = Err; } catch (...) { @@ -215,27 +186,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( auto &BufferImpl = std::get(hBuffer->Mem); UR_ASSERT(((pRegion->origin + pRegion->size) <= BufferImpl.getSize()), UR_RESULT_ERROR_INVALID_BUFFER_SIZE); - // Retained indirectly due to retaining parent buffer below. - ur_context_handle_t Context = hBuffer->Context; - BufferMem::AllocMode AllocMode = BufferMem::AllocMode::Classic; - - UR_ASSERT(BufferImpl.Ptr != BufferMem::native_type{0}, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - BufferMem::native_type Ptr = BufferImpl.getWithOffset(pRegion->origin); - - void *HostPtr = nullptr; - if (BufferImpl.HostPtr) { - HostPtr = static_cast(BufferImpl.HostPtr) + pRegion->origin; + for (auto Device : hBuffer->Context->getDevices()) { + BufferImpl.getPtr(Device); // This is allocating a dev ptr behind the scenes + // which is necessary before SubBuffer partition } ReleaseGuard ReleaseGuard(hBuffer); std::unique_ptr RetMemObj{nullptr}; try { - ScopedContext Active(Context->getDevice()); - - RetMemObj = std::unique_ptr{new ur_mem_handle_t_{ - Context, hBuffer, flags, AllocMode, Ptr, HostPtr, pRegion->size}}; + RetMemObj = std::unique_ptr{ + new ur_mem_handle_t_{hBuffer, pRegion->origin}}; } catch (ur_result_t Err) { *phMem = nullptr; return Err; @@ -258,23 +219,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT, UR_RESULT_ERROR_INVALID_ENUMERATION); - UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); + // FIXME: Only getting info for the first device in the context. This + // should be fine in general + auto Device = hMemory->getContext()->getDevices()[0]; + ScopedContext Active(Device); - ScopedContext Active(hMemory->getContext()->getDevice()); + UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); switch (MemInfoType) { case UR_MEM_INFO_SIZE: { try { - const auto MemVisitor = [](auto &&Mem) -> size_t { + const auto MemVisitor = [Device](auto &&Mem) -> size_t { using T = std::decay_t; if constexpr (std::is_same_v) { size_t AllocSize = 0; hipDeviceptr_t BasePtr = nullptr; - UR_CHECK_ERROR(hipMemGetAddressRange(&BasePtr, &AllocSize, Mem.Ptr)); + UR_CHECK_ERROR( + hipMemGetAddressRange(&BasePtr, &AllocSize, Mem.getPtr(Device))); return AllocSize; } else if constexpr (std::is_same_v) { +#if HIP_VERSION < 50600000 + throw UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +#else HIP_ARRAY3D_DESCRIPTOR ArrayDescriptor; - UR_CHECK_ERROR(hipArray3DGetDescriptor(&ArrayDescriptor, Mem.Array)); + UR_CHECK_ERROR( + hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray(Device))); const auto PixelSizeBytes = GetHipFormatPixelSize(ArrayDescriptor.Format) * ArrayDescriptor.NumChannels; @@ -284,6 +253,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, (ArrayDescriptor.Height ? ArrayDescriptor.Height : 1) * (ArrayDescriptor.Depth ? ArrayDescriptor.Depth : 1); return ImageSizeBytes; +#endif } else { static_assert(ur_always_false_t, "Not exhaustive visitor!"); } @@ -312,30 +282,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, /// \param[out] phNativeMem Set to the native handle of the UR mem object. /// /// \return UR_RESULT_SUCCESS -UR_APIEXPORT ur_result_t UR_APICALL -urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { -#if defined(__HIP_PLATFORM_NVIDIA__) - if (sizeof(BufferMem::native_type) > sizeof(ur_native_handle_t)) { - // Check that all the upper bits that cannot be represented by - // ur_native_handle_t are empty. - // NOTE: The following shift might trigger a warning, but the check in the - // if above makes sure that this does not underflow. - BufferMem::native_type UpperBits = std::get(hMem->Mem).get() >> - (sizeof(ur_native_handle_t) * CHAR_BIT); - if (UpperBits) { - // Return an error if any of the remaining bits is non-zero. - return UR_RESULT_ERROR_INVALID_MEM_OBJECT; - } - } - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).get()); -#elif defined(__HIP_PLATFORM_AMD__) - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).get()); -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif - return UR_RESULT_SUCCESS; +UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t, + ur_native_handle_t *) { + // FIXME: there is no good way of doing this with a multi device context. + // If we return a single pointer, how would we know which device's allocation + // it should be? + // If we return a vector of pointers, this is OK for read only access but if + // we write to a buffer, how would we know which one had been written to? + // Should unused allocations be updated afterwards? We have no way of knowing + // any of these things in the current API design. + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( @@ -351,7 +307,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -/// \TODO Not implemented UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -384,145 +339,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); } - ur_result_t Result = UR_RESULT_SUCCESS; - // We only support RBGA channel order // TODO: check SYCL CTS and spec. May also have to support BGRA UR_ASSERT(pImageFormat->channelOrder == UR_IMAGE_CHANNEL_ORDER_RGBA, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); - // We have to use hipArray3DCreate, which has some caveats. The height and - // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc gives - // a minimum value of 1, so we need to convert the answer. - HIP_ARRAY3D_DESCRIPTOR ArrayDesc; - ArrayDesc.NumChannels = 4; // Only support 4 channel image - ArrayDesc.Flags = 0; // No flags required - ArrayDesc.Width = pImageDesc->width; - if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { - ArrayDesc.Height = 0; - ArrayDesc.Depth = 0; - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { - ArrayDesc.Height = pImageDesc->height; - ArrayDesc.Depth = 0; - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { - ArrayDesc.Height = pImageDesc->height; - ArrayDesc.Depth = pImageDesc->depth; - } + auto URMemObj = std::unique_ptr( + new ur_mem_handle_t_{hContext, flags, *pImageFormat, *pImageDesc, pHost}); - // We need to get this now in bytes for calculating the total image size later - size_t PixelTypeSizeBytes; - - switch (pImageFormat->channelType) { - - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT8; - PixelTypeSizeBytes = 1; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT8; - PixelTypeSizeBytes = 1; - break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT16; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT16; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: - ArrayDesc.Format = HIP_AD_FORMAT_HALF; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT32; - PixelTypeSizeBytes = 4; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT32; - PixelTypeSizeBytes = 4; - break; - case UR_IMAGE_CHANNEL_TYPE_FLOAT: - ArrayDesc.Format = HIP_AD_FORMAT_FLOAT; - PixelTypeSizeBytes = 4; - break; - default: - // urMemImageCreate given unsupported image_channel_data_type - return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + if (URMemObj == nullptr) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - // When a dimension isn't used image_desc has the size set to 1 - size_t PixelSizeBytes = - PixelTypeSizeBytes * 4; // 4 is the only number of channels we support - size_t ImageSizeBytes = PixelSizeBytes * pImageDesc->width * - pImageDesc->height * pImageDesc->depth; - - ScopedContext Active(hContext->getDevice()); - hipArray *ImageArray; - UR_CHECK_ERROR(hipArray3DCreate(reinterpret_cast(&ImageArray), - &ArrayDesc)); - - try { - if (PerformInitialCopy) { - // We have to use a different copy function for each image dimensionality - if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { - UR_CHECK_ERROR(hipMemcpyHtoA(ImageArray, 0, pHost, ImageSizeBytes)); - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { - hip_Memcpy2D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost; - CpyDesc.srcHost = pHost; - CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray; - CpyDesc.dstArray = reinterpret_cast(ImageArray); - CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width; - CpyDesc.Height = pImageDesc->height; - UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc)); - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { - HIP_MEMCPY3D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost; - CpyDesc.srcHost = pHost; - CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray; - CpyDesc.dstArray = reinterpret_cast(ImageArray); - CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width; - CpyDesc.Height = pImageDesc->height; - CpyDesc.Depth = pImageDesc->depth; - UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc)); - } - } - - // HIP_RESOURCE_DESC is a union of different structs, shown here - // We need to fill it as described here to use it for a surface or texture - // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and - // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array - // handle. - // HIP_RESOURCE_DESC::flags must be set to zero - - hipResourceDesc ImageResDesc; - ImageResDesc.res.array.array = ImageArray; - ImageResDesc.resType = hipResourceTypeArray; - - hipSurfaceObject_t Surface; - UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc)); - - auto URMemObj = std::unique_ptr(new ur_mem_handle_t_{ - hContext, ImageArray, Surface, flags, pImageDesc->type, pHost}); - - if (URMemObj == nullptr) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + if (PerformInitialCopy) { + for (const auto &Dev : hContext->getDevices()) { + UR_CHECK_ERROR(migrateMemoryToDeviceIfNeeded(URMemObj.get(), Dev)); } - - *phMem = URMemObj.release(); - } catch (ur_result_t Err) { - UR_CHECK_ERROR(hipFreeArray(ImageArray)); - return Err; - } catch (...) { - UR_CHECK_ERROR(hipFreeArray(ImageArray)); - return UR_RESULT_ERROR_UNKNOWN; } - return Result; + *phMem = URMemObj.release(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, @@ -531,14 +366,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hMemory->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ScopedContext Active(hMemory->getContext()->getDevice()); + // FIXME: only getting infor for first image in ctx + auto Device = hMemory->getContext()->getDevices()[0]; + ScopedContext Active(Device); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); try { - HIP_ARRAY3D_DESCRIPTOR ArrayInfo; +#if HIP_VERSION >= 50600000 UR_CHECK_ERROR(hipArray3DGetDescriptor( - &ArrayInfo, std::get(hMemory->Mem).Array)); + &ArrayInfo, std::get(hMemory->Mem).getArray(Device))); +#else + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +#endif const auto hip2urFormat = [](hipArray_Format HipFormat) -> ur_image_channel_type_t { @@ -617,3 +457,174 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { hMem->incrementReferenceCount(); return UR_RESULT_SUCCESS; } + +inline ur_result_t +allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, + const ur_device_handle_t hDevice) { + ScopedContext Active(hDevice); + ur_lock LockGuard(Mem->MemoryAllocationMutex); + + if (Mem->isBuffer()) { + auto &Buffer = std::get(Mem->Mem); + hipDeviceptr_t &DevPtr = Buffer.Ptrs[hDevice->getIndex()]; + + // Allocation has already been made + if (DevPtr != BufferMem::native_type{0}) { + return UR_RESULT_SUCCESS; + } + + if (Buffer.MemAllocMode == BufferMem::AllocMode::AllocHostPtr) { + // Host allocation has already been made + UR_CHECK_ERROR(hipHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0)); + } else if (Buffer.MemAllocMode == BufferMem::AllocMode::UseHostPtr) { + UR_CHECK_ERROR( + hipHostRegister(Buffer.HostPtr, Buffer.Size, hipHostRegisterMapped)); + UR_CHECK_ERROR(hipHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0)); + } else { + UR_CHECK_ERROR(hipMalloc(&DevPtr, Buffer.Size)); + } + } else { + hipArray *ImageArray; + hipSurfaceObject_t Surface; + try { + auto &Image = std::get(Mem->Mem); + // Allocation has already been made + if (Image.Arrays[hDevice->getIndex()]) { + return UR_RESULT_SUCCESS; + } + UR_CHECK_ERROR(hipArray3DCreate( + reinterpret_cast(&ImageArray), &Image.ArrayDesc)); + Image.Arrays[hDevice->getIndex()] = ImageArray; + // HIP_RESOURCE_DESC is a union of different structs, shown here + // We need to fill it as described here to use it for a surface or texture + // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and + // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array + // handle. + // HIP_RESOURCE_DESC::flags must be set to zero + hipResourceDesc ImageResDesc; + ImageResDesc.res.array.array = ImageArray; + ImageResDesc.resType = hipResourceTypeArray; + + UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc)); + Image.SurfObjs[hDevice->getIndex()] = Surface; + } catch (ur_result_t Err) { + if (ImageArray) { + UR_CHECK_ERROR(hipFreeArray(ImageArray)); + } + return Err; + } catch (...) { + if (ImageArray) { + UR_CHECK_ERROR(hipFreeArray(ImageArray)); + } + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; +} + +namespace { +inline ur_result_t migrateBufferToDevice(ur_mem_handle_t Mem, + ur_device_handle_t hDevice) { + auto &Buffer = std::get(Mem->Mem); + if (Mem->LastEventWritingToMemObj == nullptr) { + // Device allocation being initialized from host for the first time + if (Buffer.HostPtr) { + UR_CHECK_ERROR( + hipMemcpyHtoD(Buffer.getPtr(hDevice), Buffer.HostPtr, Buffer.Size)); + } + } else if (Mem->LastEventWritingToMemObj->getDevice() != hDevice) { + UR_CHECK_ERROR( + hipMemcpyDtoD(Buffer.getPtr(hDevice), + Buffer.getPtr(Mem->LastEventWritingToMemObj->getDevice()), + Buffer.Size)); + } + return UR_RESULT_SUCCESS; +} + +inline ur_result_t migrateImageToDevice(ur_mem_handle_t Mem, + ur_device_handle_t hDevice) { + auto &Image = std::get(Mem->Mem); + // When a dimension isn't used image_desc has the size set to 1 + size_t PixelSizeBytes = Image.PixelTypeSizeBytes * + 4; // 4 is the only number of channels we support + size_t ImageSizeBytes = PixelSizeBytes * Image.ImageDesc.width * + Image.ImageDesc.height * Image.ImageDesc.depth; + + hipArray *ImageArray = Image.getArray(hDevice); + + hip_Memcpy2D CpyDesc2D; + HIP_MEMCPY3D CpyDesc3D; + // We have to use a different copy function for each image + // dimensionality + if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + memset(&CpyDesc2D, 0, sizeof(CpyDesc2D)); + CpyDesc2D.srcMemoryType = hipMemoryType::hipMemoryTypeHost; + CpyDesc2D.dstMemoryType = hipMemoryType::hipMemoryTypeArray; + CpyDesc2D.dstArray = reinterpret_cast(ImageArray); + CpyDesc2D.WidthInBytes = PixelSizeBytes * Image.ImageDesc.width; + CpyDesc2D.Height = Image.ImageDesc.height; + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + memset(&CpyDesc3D, 0, sizeof(CpyDesc3D)); + CpyDesc3D.srcMemoryType = hipMemoryType::hipMemoryTypeHost; + CpyDesc3D.dstMemoryType = hipMemoryType::hipMemoryTypeArray; + CpyDesc3D.dstArray = reinterpret_cast(ImageArray); + CpyDesc3D.WidthInBytes = PixelSizeBytes * Image.ImageDesc.width; + CpyDesc3D.Height = Image.ImageDesc.height; + CpyDesc3D.Depth = Image.ImageDesc.depth; + } + + if (Mem->LastEventWritingToMemObj == nullptr) { + if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { + UR_CHECK_ERROR( + hipMemcpyHtoA(ImageArray, 0, Image.HostPtr, ImageSizeBytes)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + CpyDesc2D.srcHost = Image.HostPtr; + UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc2D)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + CpyDesc3D.srcHost = Image.HostPtr; + UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); + } + } else if (Mem->LastEventWritingToMemObj->getDevice() != hDevice) { + if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { + // FIXME: 1D memcpy from DtoD going through the host. + UR_CHECK_ERROR(hipMemcpyAtoH( + Image.HostPtr, + Image.getArray(Mem->LastEventWritingToMemObj->getDevice()), + 0 /*srcOffset*/, ImageSizeBytes)); + UR_CHECK_ERROR( + hipMemcpyHtoA(ImageArray, 0, Image.HostPtr, ImageSizeBytes)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + CpyDesc2D.srcArray = + Image.getArray(Mem->LastEventWritingToMemObj->getDevice()); + UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc2D)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + CpyDesc3D.srcArray = + Image.getArray(Mem->LastEventWritingToMemObj->getDevice()); + UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); + } + } + return UR_RESULT_SUCCESS; +} +} // namespace + +// If calling this entry point it is necessary to lock the memoryMigrationMutex +// beforehand +ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t Mem, + const ur_device_handle_t hDevice) { + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + // Device allocation has already been initialized with most up to date + // data in buffer + if (Mem->HaveMigratedToDeviceSinceLastWrite[hDevice->getIndex()]) { + return UR_RESULT_SUCCESS; + } + + ScopedContext Active(hDevice); + if (Mem->isBuffer()) { + UR_CHECK_ERROR(migrateBufferToDevice(Mem, hDevice)); + } else { + UR_CHECK_ERROR(migrateImageToDevice(Mem, hDevice)); + } + + Mem->HaveMigratedToDeviceSinceLastWrite[hDevice->getIndex()] = true; + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 2732b22a6e..d36b9ee001 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -10,18 +10,25 @@ #pragma once #include "common.hpp" +#include "context.hpp" +#include "event.hpp" #include #include +ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); +ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); + // Handler for plain, pointer-based HIP allocations struct BufferMem { using native_type = hipDeviceptr_t; // If this allocation is a sub-buffer (i.e., a view on an existing // allocation), this is the pointer to the parent handler structure - ur_mem_handle_t Parent; - // HIP handler for the pointer - native_type Ptr; + ur_mem_handle_t Parent = nullptr; + // Outer mem holding this struct in variant + ur_mem_handle_t OuterMemStruct; /// Pointer associated with this device on the host void *HostPtr; @@ -50,20 +57,44 @@ struct BufferMem { AllocHostPtr } MemAllocMode; - BufferMem(ur_mem_handle_t Parent, AllocMode Mode, hipDeviceptr_t Ptr, - void *HostPtr, size_t Size) - : Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, MapSize{0}, - MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE}, - MemAllocMode{Mode} {}; +private: + // Vector of HIP pointers + std::vector Ptrs; + +public: + BufferMem(ur_context_handle_t Context, ur_mem_handle_t OuterMemStruct, + AllocMode Mode, void *HostPtr, size_t Size) + : OuterMemStruct{OuterMemStruct}, HostPtr{HostPtr}, Size{Size}, + MapSize{0}, MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE}, + MemAllocMode{Mode}, Ptrs(Context->Devices.size(), native_type{0}){}; + + BufferMem(const BufferMem &Buffer) = default; - native_type get() const noexcept { return Ptr; } + // This will allocate memory on device if there isn't already an active + // allocation on the device + native_type getPtr(const ur_device_handle_t Device) { + return getPtrWithOffset(Device, 0); + } + + // This will allocate memory on device with index Index if there isn't already + // an active allocation on the device + native_type getPtrWithOffset(const ur_device_handle_t Device, size_t Offset) { + if (ur_result_t Err = + allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device); + Err != UR_RESULT_SUCCESS) { + throw Err; + } + return reinterpret_cast( + reinterpret_cast(Ptrs[Device->getIndex()]) + Offset); + } - native_type getWithOffset(size_t Offset) const noexcept { - return reinterpret_cast(reinterpret_cast(Ptr) + - Offset); + // This will allocate memory on device if there isn't already an active + // allocation on the device + void *getVoid(const ur_device_handle_t Device) { + return reinterpret_cast(getPtrWithOffset(Device, 0)); } - void *getVoid() const noexcept { return reinterpret_cast(Ptr); } + bool isSubBuffer() const noexcept { return Parent != nullptr; } size_t getSize() const noexcept { return Size; } @@ -107,28 +138,240 @@ struct BufferMem { assert(MapPtr != nullptr); return MapFlags; } + + ur_result_t clear() { + if (Parent != nullptr) { + return UR_RESULT_SUCCESS; + } + + switch (MemAllocMode) { + case AllocMode::CopyIn: + case AllocMode::Classic: + for (auto &DevPtr : Ptrs) { + if (DevPtr != native_type{0}) { + UR_CHECK_ERROR(hipFree(DevPtr)); + } + } + break; + case AllocMode::UseHostPtr: + UR_CHECK_ERROR(hipHostUnregister(HostPtr)); + break; + case AllocMode::AllocHostPtr: + UR_CHECK_ERROR(hipFreeHost(HostPtr)); + } + return UR_RESULT_SUCCESS; + } + + friend struct ur_mem_handle_t_; + friend ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); }; // Handler data for surface object (i.e. Images) struct SurfaceMem { - hipArray *Array; - hipSurfaceObject_t SurfObj; - ur_mem_type_t ImageType; +private: + std::vector Arrays; + std::vector SurfObjs; + +public: + ur_mem_handle_t OuterMemStruct; + + ur_image_format_t ImageFormat; + ur_image_desc_t ImageDesc; + HIP_ARRAY3D_DESCRIPTOR ArrayDesc; + size_t PixelTypeSizeBytes; + void *HostPtr; + + SurfaceMem(ur_context_handle_t Context, ur_mem_handle_t OuterMemStruct, + ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc, + void *HostPtr) + : Arrays(Context->Devices.size(), nullptr), + SurfObjs(Context->Devices.size(), nullptr), + OuterMemStruct{OuterMemStruct}, + ImageFormat{ImageFormat}, ImageDesc{ImageDesc}, HostPtr{HostPtr} { + // We have to use hipArray3DCreate, which has some caveats. The height and + // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc + // gives a minimum value of 1, so we need to convert the answer. + ArrayDesc.NumChannels = 4; // Only support 4 channel image + ArrayDesc.Flags = 0; // No flags required + ArrayDesc.Width = ImageDesc.width; + if (ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { + ArrayDesc.Height = 0; + ArrayDesc.Depth = 0; + } else if (ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + ArrayDesc.Height = ImageDesc.height; + ArrayDesc.Depth = 0; + } else if (ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + ArrayDesc.Height = ImageDesc.height; + ArrayDesc.Depth = ImageDesc.depth; + } + + // We need to get PixelTypeSizeBytes for calculating the total image size + // later + switch (ImageFormat.channelType) { + + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: + ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT8; + PixelTypeSizeBytes = 1; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: + ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT8; + PixelTypeSizeBytes = 1; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: + ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT16; + PixelTypeSizeBytes = 2; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: + ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT16; + PixelTypeSizeBytes = 2; + break; + case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: + ArrayDesc.Format = HIP_AD_FORMAT_HALF; + PixelTypeSizeBytes = 2; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: + ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT32; + PixelTypeSizeBytes = 4; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: + ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT32; + PixelTypeSizeBytes = 4; + break; + case UR_IMAGE_CHANNEL_TYPE_FLOAT: + ArrayDesc.Format = HIP_AD_FORMAT_FLOAT; + PixelTypeSizeBytes = 4; + break; + default: + // urMemImageCreate given unsupported image_channel_data_type + detail::ur::die("Bad image format given to ur_image_ constructor"); + } + } + + // Will allocate a new array on device if not already allocated + hipArray *getArray(const ur_device_handle_t Device) { + if (ur_result_t Err = + allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device); + Err != UR_RESULT_SUCCESS) { + throw Err; + } + return Arrays[Device->getIndex()]; + } - SurfaceMem(hipArray *Array, hipSurfaceObject_t Surf, ur_mem_type_t ImageType) - : Array{Array}, SurfObj{Surf}, ImageType{ImageType} {}; + // Will allocate a new surface on device if not already allocated + hipSurfaceObject_t getSurface(const ur_device_handle_t Device) { + if (ur_result_t Err = + allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device); + Err != UR_RESULT_SUCCESS) { + throw Err; + } + return SurfObjs[Device->getIndex()]; + } - hipArray *getArray() const noexcept { return Array; } + ur_mem_type_t getImageType() const noexcept { return ImageDesc.type; } - hipSurfaceObject_t getSurface() const noexcept { return SurfObj; } + ur_result_t clear() { + for (auto Array : Arrays) { + if (Array) { + UR_CHECK_ERROR(hipFreeArray(Array)); + } + } + for (auto Surf : SurfObjs) { + if (Surf != hipSurfaceObject_t{0}) { + UR_CHECK_ERROR(hipDestroySurfaceObject(Surf)); + } + } + return UR_RESULT_SUCCESS; + } - ur_mem_type_t getImageType() const noexcept { return ImageType; } + friend ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); }; /// UR Mem mapping to HIP memory allocations, both data and texture/surface. /// \brief Represents non-SVM allocations on the HIP backend. /// Keeps tracks of all mapped regions used for Map/Unmap calls. /// Only one region can be active at the same time per allocation. +/// +/// The ur_mem_handle_t is responsible for memory allocation and migration +/// across devices in the same ur_context_handle_t. If a kernel writes to a +/// ur_mem_handle_t then it will write to LastEventWritingToMemObj. Then all +/// subsequent operations that want to read from the ur_mem_handle_t must wait +/// on the event referring to the last write. +/// +/// Since urMemBufferCreate/urMemImageCreate do not take a queue or device +/// object, only a ur_context_handle_t, at mem obj creation we don't know which +/// device we must make a native image/allocation on. Therefore no allocations +/// are made at urMemBufferCreate/urMemImageCreate. Instead device +/// images/allocations are made lazily. These allocations are made implicitly +/// with a call to getPtr/getArray which will allocate a new allocation/image on +/// device if need be. +/// +/// Memory migration between native allocations for devices in the same +/// ur_context_handle_t will occur at: +/// +/// 1. urEnqueueKernelLaunch +/// 2. urEnqueueMem(Buffer|Image)Read(Rect) +/// +/// Migrations will occur in both cases if the most recent version of data +/// is on a different device, marked by LastEventWritingToMemObj->getDevice(). +/// +/// Example trace: +/// ~~~~~~~~~~~~~~ +/// +/// =====> urContextCreate([device0, device1], ...) // associated with [q0, q1] +/// -> OUT: hContext +/// +/// =====> urMemBufferCreate(hContext,...); +/// -> No native allocations made +/// -> OUT: hBuffer +/// +/// =====> urEnqueueMemBufferWrite(q0, hBuffer,...); +/// -> Allocation made on q0 ie device0 +/// -> New allocation initialized with host data. +/// +/// =====> urKernelSetArgMemObj(hKernel0, hBuffer, ...); +/// -> ur_kernel_handle_t associated with a ur_program_handle_t, +/// which is in turn unique to a device. So we can set the kernel +/// arg with the ptr of the device specific allocation. +/// -> hKernel0->getProgram()->getDevice() == device0 +/// -> allocateMemObjOnDeviceIfNeeded(device0); +/// -> Native allocation already made on device0, continue. +/// +/// =====> urEnqueueKernelLaunch(q0, hKernel0, ...); +/// -> Suppose that hKernel0 writes to hBuffer. +/// -> Call hBuffer->setLastEventWritingToMemObj with return event +/// from this operation +/// -> Enqueue native kernel launch +/// +/// =====> urKernelSetArgMemObj(hKernel1, hBuffer, ...); +/// -> hKernel1->getProgram()->getDevice() == device1 +/// -> New allocation will be made on device1 when calling +/// getPtr(device1) +/// -> No native allocation on device1 +/// -> Make native allocation on device1 +/// +/// =====> urEnqueueKernelLaunch(q1, hKernel1, ...); +/// -> Suppose hKernel1 wants to read from hBuffer and not write. +/// -> migrateMemoryToDeviceIfNeeded(device1); +/// -> hBuffer->LastEventWritingToMemObj is not nullptr +/// -> Check if memory has been migrated to device1 since the +/// last write +/// -> Hasn't been migrated +/// -> Wait on LastEventWritingToMemObj. +/// -> Migrate memory from device0's native allocation to +/// device1's native allocation. +/// -> Enqueue native kernel launch +/// +/// =====> urEnqueueKernelLaunch(q0, hKernel0, ...); +/// -> migrateMemoryToDeviceIfNeeded(device0); +/// -> hBuffer->LastEventWritingToMemObj refers to an event +/// from q0 +/// -> Migration not necessary +/// -> Enqueue native kernel launch +/// struct ur_mem_handle_t_ { // TODO: Move as much shared data up as possible @@ -140,36 +383,76 @@ struct ur_mem_handle_t_ { /// Reference counting of the handler std::atomic_uint32_t RefCount; - enum class Type { Buffer, Surface } MemType; // Original mem flags passed ur_mem_flags_t MemFlags; + // If we make a ur_mem_handle_t_ from a native allocation, it can be useful to + // associate it with the device that holds the native allocation. + ur_device_handle_t DeviceWithNativeAllocation{nullptr}; + + // Has the memory been migrated to a device since the last write? + std::vector HaveMigratedToDeviceSinceLastWrite; + + // We should wait on this event prior to migrating memory across allocations + // in this ur_mem_handle_t_ + ur_event_handle_t LastEventWritingToMemObj{nullptr}; + + // Enumerates all possible types of accesses. + enum access_mode_t { unknown, read_write, read_only, write_only }; + + ur_mutex MemoryAllocationMutex; // A mutex for allocations + ur_mutex MemoryMigrationMutex; // A mutex for memory transfers + /// A UR Memory object represents either plain memory allocations ("Buffers" /// in OpenCL) or typed allocations ("Images" in OpenCL). /// In HIP their API handlers are different. Whereas "Buffers" are allocated /// as pointer-like structs, "Images" are stored in Textures or Surfaces. - /// This union allows implementation to use either from the same handler. + /// This variant allows implementation to use either from the same handler. std::variant Mem; - /// Constructs the UR MEM handler for a non-typed allocation ("buffer") - ur_mem_handle_t_(ur_context Ctxt, ur_mem Parent, ur_mem_flags_t MemFlags, - BufferMem::AllocMode Mode, hipDeviceptr_t Ptr, void *HostPtr, - size_t Size) - : Context{Ctxt}, RefCount{1}, MemType{Type::Buffer}, MemFlags{MemFlags}, - Mem{BufferMem{Parent, Mode, Ptr, HostPtr, Size}} { - if (isSubBuffer()) { - urMemRetain(std::get(Mem).Parent); - } else { - urContextRetain(Context); + /// Constructs the UR mem handler for a non-typed allocation ("buffer") + ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags, + BufferMem::AllocMode Mode, void *HostPtr, size_t Size) + : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), + Mem{std::in_place_type, Ctxt, this, Mode, HostPtr, Size} { + urContextRetain(Context); + }; + + // Subbuffer constructor + ur_mem_handle_t_(ur_mem Parent, size_t SubBufferOffset) + : Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags}, + HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(), + false), + Mem{BufferMem{std::get(Parent->Mem)}} { + auto &SubBuffer = std::get(Mem); + SubBuffer.Parent = Parent; + SubBuffer.OuterMemStruct = this; + if (SubBuffer.HostPtr) { + SubBuffer.HostPtr = + static_cast(SubBuffer.HostPtr) + SubBufferOffset; + } + for (auto &DevPtr : SubBuffer.Ptrs) { + if (DevPtr) { + DevPtr = static_cast(DevPtr) + SubBufferOffset; + } } + urMemRetain(Parent); }; - /// Constructs the UR allocation for an Image object - ur_mem_handle_t_(ur_context Ctxt, hipArray *Array, hipSurfaceObject_t Surf, - ur_mem_flags_t MemFlags, ur_mem_type_t ImageType, void *) - : Context{Ctxt}, RefCount{1}, MemType{Type::Surface}, MemFlags{MemFlags}, - Mem{SurfaceMem{Array, Surf, ImageType}} { + /// Constructs the UR mem handler for an Image object + ur_mem_handle_t_(ur_context Ctxt, ur_mem_flags_t MemFlags, + ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc, + void *HostPtr) + : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), + Mem{std::in_place_type, + Ctxt, + this, + ImageFormat, + ImageDesc, + HostPtr} { urContextRetain(Context); } @@ -181,13 +464,24 @@ struct ur_mem_handle_t_ { urContextRelease(Context); } - bool isBuffer() const noexcept { return MemType == Type::Buffer; } + bool isBuffer() const noexcept { + return std::holds_alternative(Mem); + } bool isSubBuffer() const noexcept { return (isBuffer() && (std::get(Mem).Parent != nullptr)); } - bool isImage() const noexcept { return MemType == Type::Surface; } + bool isImage() const noexcept { + return std::holds_alternative(Mem); + } + + ur_result_t clear() { + if (isBuffer()) { + return std::get(Mem).clear(); + } + return std::get(Mem).clear(); + } ur_context getContext() const noexcept { return Context; } @@ -196,4 +490,19 @@ struct ur_mem_handle_t_ { uint32_t decrementReferenceCount() noexcept { return --RefCount; } uint32_t getReferenceCount() const noexcept { return RefCount; } + + void setLastEventWritingToMemObj(ur_event_handle_t NewEvent) { + assert(NewEvent && "Invalid event!"); + // This entry point should only ever be called when using multi device ctx + assert(Context->Devices.size() > 1); + if (LastEventWritingToMemObj != nullptr) { + urEventRelease(LastEventWritingToMemObj); + } + urEventRetain(NewEvent); + LastEventWritingToMemObj = NewEvent; + for (const auto &Device : Context->getDevices()) { + HaveMigratedToDeviceSinceLastWrite[Device->getIndex()] = + Device == NewEvent->getDevice(); + } + } }; diff --git a/source/adapters/hip/platform.cpp b/source/adapters/hip/platform.cpp index 5f35b55f1f..287f941c30 100644 --- a/source/adapters/hip/platform.cpp +++ b/source/adapters/hip/platform.cpp @@ -47,9 +47,6 @@ urPlatformGetInfo(ur_platform_handle_t, ur_platform_info_t propName, /// There is only one HIP platform, and contains all devices on the system. /// Triggers the HIP Driver initialization (hipInit) the first time, so this /// must be the first UR API called. -/// -/// However because multiple devices in a context is not currently supported, -/// place each device in a separate platform. UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { @@ -57,7 +54,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, try { static std::once_flag InitFlag; static uint32_t NumPlatforms = 1; - static std::vector PlatformIds; + static ur_platform_handle_t_ Platform; UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_VALUE); @@ -79,22 +76,18 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, return; } try { - // make one platform per device - NumPlatforms = NumDevices; - PlatformIds.resize(NumDevices); - - for (int i = 0; i < NumDevices; ++i) { + for (auto i = 0u; i < static_cast(NumDevices); ++i) { hipDevice_t Device; UR_CHECK_ERROR(hipDeviceGet(&Device, i)); hipCtx_t Context; UR_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Context, Device)); - PlatformIds[i].Devices.emplace_back( - new ur_device_handle_t_{Device, Context, &PlatformIds[i]}); + Platform.Devices.emplace_back( + new ur_device_handle_t_{Device, Context, &Platform, i}); } // Setup EvBase { - ScopedContext Active(PlatformIds.front().Devices.front().get()); + ScopedContext Active(Platform.Devices.front().get()); hipEvent_t EvBase; UR_CHECK_ERROR(hipEventCreate(&EvBase)); UR_CHECK_ERROR(hipEventRecord(EvBase, 0)); @@ -103,17 +96,11 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } } catch (const std::bad_alloc &) { // Signal out-of-memory situation - for (int i = 0; i < NumDevices; ++i) { - PlatformIds[i].Devices.clear(); - } - PlatformIds.clear(); + Platform.Devices.clear(); Err = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (ur_result_t CatchErr) { // Clear and rethrow to allow retry - for (int i = 0; i < NumDevices; ++i) { - PlatformIds[i].Devices.clear(); - } - PlatformIds.clear(); + Platform.Devices.clear(); Err = CatchErr; throw CatchErr; } catch (...) { @@ -128,9 +115,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } if (phPlatforms != nullptr) { - for (unsigned i = 0; i < std::min(NumEntries, NumPlatforms); ++i) { - phPlatforms[i] = &PlatformIds[i]; - } + *phPlatforms = &Platform; } return Result; diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 2c71c53208..0cf539602b 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -74,14 +74,6 @@ void getCoMgrBuildLog(const amd_comgr_data_set_t BuildDataSet, char *BuildLog, } // namespace #endif -ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Ctxt) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, Context{ - Ctxt} { - urContextRetain(Context); -} - -ur_program_handle_t_::~ur_program_handle_t_() { urContextRelease(Context); } - ur_result_t ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata, size_t Length) { @@ -135,8 +127,8 @@ ur_result_t ur_program_handle_t_::finalizeRelocatable() { std::string ISA = "amdgcn-amd-amdhsa--"; hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties( - &Props, Context->getDevice()->get()) == hipSuccess); + detail::ur::assertion(hipGetDeviceProperties(&Props, getDevice()->get()) == + hipSuccess); ISA += Props.gcnArchName; UR_CHECK_ERROR(amd_comgr_action_info_set_isa_name(Action, ISA.data())); @@ -222,18 +214,13 @@ ur_result_t getKernelNames(ur_program_handle_t) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -/// HIP will handle the PTX/HIPBIN binaries internally through hipModule_t -/// object. So, urProgramCreateWithIL and urProgramCreateWithBinary are -/// equivalent in terms of HIP adapter. See \ref urProgramCreateWithBinary. +/// A program must be specific to a device so this entry point is UNSUPPORTED UR_APIEXPORT ur_result_t UR_APICALL -urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, - size_t length, const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { - ur_device_handle_t hDevice = hContext->getDevice(); - const auto pBinary = reinterpret_cast(pIL); - - return urProgramCreateWithBinary(hContext, hDevice, length, pBinary, - pProperties, phProgram); +urProgramCreateWithIL(ur_context_handle_t, const void *, size_t, + const ur_program_properties_t *, ur_program_handle_t *) { + detail::ur::die("urProgramCreateWithIL not implemented for HIP adapter" + " please use urProgramCreateWithBinary instead"); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } /// HIP will handle the PTX/HIPBIN binaries internally through a call to @@ -268,7 +255,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t, ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hProgram->getContext()->getDevice()); + ScopedContext Active(hProgram->getDevice()); hProgram->buildProgram(pOptions); @@ -340,7 +327,7 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, case UR_PROGRAM_INFO_NUM_DEVICES: return ReturnValue(1u); case UR_PROGRAM_INFO_DEVICES: - return ReturnValue(&hProgram->Context->DeviceId, 1); + return ReturnValue(hProgram->getDevice(), 1); case UR_PROGRAM_INFO_SOURCE: return ReturnValue(hProgram->Binary); case UR_PROGRAM_INFO_BINARY_SIZES: @@ -380,7 +367,7 @@ urProgramRelease(ur_program_handle_t hProgram) { ur_result_t Result = UR_RESULT_ERROR_INVALID_PROGRAM; try { - ScopedContext Active(hProgram->getContext()->getDevice()); + ScopedContext Active(hProgram->getDevice()); auto HIPModule = hProgram->get(); if (HIPModule) { UR_CHECK_ERROR(hipModuleUnload(HIPModule)); @@ -422,13 +409,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const uint8_t *pBinary, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { UR_ASSERT(pBinary != nullptr && size != 0, UR_RESULT_ERROR_INVALID_BINARY); - UR_ASSERT(hContext->getDevice()->get() == hDevice->get(), + UR_ASSERT(std::find(hContext->getDevices().begin(), + hContext->getDevices().end(), + hDevice) != hContext->getDevices().end(), UR_RESULT_ERROR_INVALID_CONTEXT); ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetProgram{ - new ur_program_handle_t_{hContext}}; + new ur_program_handle_t_{hContext, hDevice}}; // TODO: Set metadata here and use reqd_work_group_size information. // See urProgramCreateWithBinary in CUDA adapter. @@ -469,8 +458,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { // Check if device passed is the same the device bound to the context - UR_ASSERT(hDevice == hProgram->getContext()->getDevice(), - UR_RESULT_ERROR_INVALID_DEVICE); + UR_ASSERT(hDevice == hProgram->getDevice(), UR_RESULT_ERROR_INVALID_DEVICE); hipFunction_t Func; hipError_t Ret = hipModuleGetFunction(&Func, hProgram->get(), pFunctionName); diff --git a/source/adapters/hip/program.hpp b/source/adapters/hip/program.hpp index ff9b68fc92..4b4e5ec878 100644 --- a/source/adapters/hip/program.hpp +++ b/source/adapters/hip/program.hpp @@ -23,6 +23,7 @@ struct ur_program_handle_t_ { size_t BinarySizeInBytes; std::atomic_uint32_t RefCount; ur_context_handle_t Context; + ur_device_handle_t Device; std::string ExecutableCache; // Metadata @@ -34,8 +35,17 @@ struct ur_program_handle_t_ { std::string BuildOptions; ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; - ur_program_handle_t_(ur_context_handle_t Ctxt); - ~ur_program_handle_t_(); + ur_program_handle_t_(ur_context_handle_t Ctxt, ur_device_handle_t Device) + : Module{nullptr}, Binary{}, + BinarySizeInBytes{0}, RefCount{1}, Context{Ctxt}, Device{Device} { + urContextRetain(Context); + urDeviceRetain(Device); + } + + ~ur_program_handle_t_() { + urContextRelease(Context); + urDeviceRelease(Device); + } ur_result_t setMetadata(const ur_program_metadata_t *Metadata, size_t Length); @@ -44,6 +54,7 @@ struct ur_program_handle_t_ { ur_result_t buildProgram(const char *BuildOptions); ur_result_t finalizeRelocatable(); ur_context_handle_t getContext() const { return Context; }; + ur_device_handle_t getDevice() const { return Device; }; native_type get() const noexcept { return Module; }; diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index 910d7cf512..f01fc0e180 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -110,14 +110,13 @@ hipStream_t ur_queue_handle_t_::getNextTransferStream() { UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProps, ur_queue_handle_t *phQueue) { + UR_ASSERT(std::find(hContext->getDevices().begin(), + hContext->getDevices().end(), + hDevice) != hContext->getDevices().end(), + UR_RESULT_ERROR_INVALID_CONTEXT); try { std::unique_ptr QueueImpl{nullptr}; - if (hContext->getDevice() != hDevice) { - *phQueue = nullptr; - return UR_RESULT_ERROR_INVALID_DEVICE; - } - unsigned int Flags = 0; const bool IsOutOfOrder = @@ -198,7 +197,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { if (!hQueue->backendHasOwnership()) return UR_RESULT_SUCCESS; - ScopedContext Active(hQueue->getContext()->getDevice()); + ScopedContext Active(hQueue->getDevice()); hQueue->forEachStream([](hipStream_t S) { UR_CHECK_ERROR(hipStreamSynchronize(S)); @@ -219,7 +218,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { try { - ScopedContext Active(hQueue->getContext()->getDevice()); + ScopedContext Active(hQueue->getDevice()); hQueue->syncStreams([&Result](hipStream_t S) { UR_CHECK_ERROR(hipStreamSynchronize(S)); @@ -251,7 +250,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - ScopedContext Active(hQueue->getContext()->getDevice()); + ScopedContext Active(hQueue->getDevice()); *phNativeQueue = reinterpret_cast(hQueue->getNextComputeStream()); return UR_RESULT_SUCCESS; @@ -291,7 +290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( new ur_queue_handle_t_{std::move(ComputeHIPStreams), std::move(TransferHIPStreams), hContext, - hContext->getDevice(), + hDevice, HIPFlags, Flags, /*backend_owns*/ pProperties->isNativeHandleOwned}; diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index bc3555d16b..a02f80957e 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -202,9 +202,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; @@ -278,17 +275,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteExp = + urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; @@ -307,6 +304,22 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( return retVal; } +// TODO: Implement +UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t, ur_bindless_images_exp_dditable_t *) { + // This needs to return UR_RESULT_SUCCESS or else the platform can't be + // initialized + return UR_RESULT_SUCCESS; +} + +// TODO: Implement +UR_DLLEXPORT ur_result_t UR_APICALL +urGetUSMExpProcAddrTable(ur_api_version_t, ur_usm_exp_dditable_t *) { + // This needs to return UR_RESULT_SUCCESS or else the platform can't be + // initialized + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( ur_api_version_t version, ///< [in] API version requested ur_virtual_mem_dditable_t @@ -345,19 +358,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnCooperativeKernelLaunchExp = nullptr; + + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr; + + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } + pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - return retVal; + + return UR_RESULT_SUCCESS; } #if defined(__cplusplus) diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 7af7401f87..e63379d13b 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -66,11 +66,10 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, return umfPoolMallocHelper(hPool, ppMem, size, alignment); } -UR_APIEXPORT ur_result_t UR_APICALL USMFreeImpl(ur_context_handle_t hContext, - void *pMem) { +UR_APIEXPORT ur_result_t UR_APICALL +USMFreeImpl([[maybe_unused]] ur_context_handle_t hContext, void *pMem) { ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hContext->getDevice()); hipPointerAttribute_t hipPointerAttributeType; UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); unsigned int Type = hipPointerAttributeType.memoryType; @@ -98,12 +97,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, } } -ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t, ur_usm_device_mem_flags_t *, - size_t Size, +ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t, + ur_device_handle_t Device, + ur_usm_device_mem_flags_t *, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Context->getDevice()); + ScopedContext Active(Device); UR_CHECK_ERROR(hipMalloc(ResultPtr, Size)); } catch (ur_result_t Err) { return Err; @@ -113,12 +112,13 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t, ur_usm_host_mem_flags_t *, +ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t, + ur_device_handle_t Device, + ur_usm_host_mem_flags_t *, ur_usm_device_mem_flags_t *, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Context->getDevice()); + ScopedContext Active(Device); UR_CHECK_ERROR(hipMallocManaged(ResultPtr, Size, hipMemAttachGlobal)); } catch (ur_result_t Err) { return Err; @@ -128,11 +128,11 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, +ur_result_t USMHostAllocImpl(void **ResultPtr, + [[maybe_unused]] ur_context_handle_t Context, ur_usm_host_mem_flags_t *, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Context->getDevice()); UR_CHECK_ERROR(hipHostMalloc(ResultPtr, Size)); } catch (ur_result_t Err) { return Err; @@ -152,7 +152,6 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); try { - ScopedContext Active(hContext->getDevice()); switch (propName) { case UR_USM_ALLOC_INFO_TYPE: { unsigned int Value; @@ -190,9 +189,6 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, #endif return ReturnValue(UR_USM_TYPE_UNKNOWN); } - case UR_USM_ALLOC_INFO_BASE_PTR: - case UR_USM_ALLOC_INFO_SIZE: - return UR_RESULT_ERROR_INVALID_VALUE; case UR_USM_ALLOC_INFO_DEVICE: { // get device index associated with this pointer UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); @@ -222,6 +218,9 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } return ReturnValue(Pool); } + case UR_USM_ALLOC_INFO_BASE_PTR: + case UR_USM_ALLOC_INFO_SIZE: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -346,25 +345,26 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]) .second; - auto Device = Context->DeviceId; - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - DeviceMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]) - .second; - - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - SharedMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]) - .second; - Context->addPool(this); + for (const auto &Device : Context->getDevices()) { + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + DeviceMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]) + .second; + + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + SharedMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]) + .second; + Context->addPool(this); + } } bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 67b1b26e7f..1850083caa 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -13,17 +13,6 @@ ur_adapter_handle_t_ Adapter{}; -UR_APIEXPORT ur_result_t UR_APICALL -urInit(ur_device_init_flags_t - DeviceFlags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of - ///< ::ur_device_init_flag_t. - ur_loader_config_handle_t) { - std::ignore = DeviceFlags; - - return UR_RESULT_SUCCESS; -} - ur_result_t adapterStateTeardown() { // reclaim ur_platform_handle_t objects here since we don't have // urPlatformRelease. @@ -121,13 +110,6 @@ ur_result_t adapterStateTeardown() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urTearDown( - void *Params ///< [in] pointer to tear down parameters -) { - std::ignore = Params; - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phAdapters. If phAdapters is not NULL, then diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 7ba3cfae4d..e8f3b061f9 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -545,7 +545,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -555,7 +555,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -581,7 +581,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, @@ -609,7 +609,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( DstSlicePitch, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, const void *Src, uint32_t NumSyncPointsInWaitList, @@ -628,7 +628,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -648,7 +648,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( BufferSlicePitch, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -663,7 +663,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index f5b00d80cc..acc7c755f4 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -88,6 +88,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( return UR_RESULT_SUCCESS; } +uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { + // Cache GlobalMemSize + Device->ZeGlobalMemSize.Compute = + [Device](struct ze_global_memsize &GlobalMemSize) { + for (const auto &ZeDeviceMemoryExtProperty : + Device->ZeDeviceMemoryProperties->second) { + GlobalMemSize.value += ZeDeviceMemoryExtProperty.physicalSize; + } + if (GlobalMemSize.value == 0) { + for (const auto &ZeDeviceMemoryProperty : + Device->ZeDeviceMemoryProperties->first) { + GlobalMemSize.value += ZeDeviceMemoryProperty.totalSize; + } + } + }; + return Device->ZeGlobalMemSize.operator->()->value; +} + UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve @@ -249,22 +267,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(uint32_t{64}); } case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + // if not optimized for 32-bit access, return total memory size. + // otherwise, return only maximum allocatable size. + if (Device->useOptimized32bitAccess() == 0) { + return ReturnValue(uint64_t{calculateGlobalMemSize(Device)}); + } else { + return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + } case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { - uint64_t GlobalMemSize = 0; // Support to read physicalSize depends on kernel, // so fallback into reading totalSize if physicalSize // is not available. - for (const auto &ZeDeviceMemoryExtProperty : - Device->ZeDeviceMemoryProperties->second) { - GlobalMemSize += ZeDeviceMemoryExtProperty.physicalSize; - } - if (GlobalMemSize == 0) { - for (const auto &ZeDeviceMemoryProperty : - Device->ZeDeviceMemoryProperties->first) { - GlobalMemSize += ZeDeviceMemoryProperty.totalSize; - } - } + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); return ReturnValue(uint64_t{GlobalMemSize}); } case UR_DEVICE_INFO_LOCAL_MEM_SIZE: @@ -637,6 +651,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( static_cast(ZE_RESULT_ERROR_UNINITIALIZED)); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } + // Calculate the global memory size as the max limit that can be reported as + // "free" memory for the user to allocate. + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); // Only report device memory which zeMemAllocDevice can allocate from. // Currently this is only the one enumerated with ordinal 0. uint64_t FreeMemory = 0; @@ -661,7 +678,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } } } - return ReturnValue(FreeMemory); + return ReturnValue(std::min(GlobalMemSize, FreeMemory)); } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { // If there are not any memory modules then return 0. @@ -900,6 +917,22 @@ ur_device_handle_t_::useImmediateCommandLists() { } } +int32_t ur_device_handle_t_::useOptimized32bitAccess() { + static const int32_t Optimize32bitAccessMode = [this] { + // If device is Intel(R) Data Center GPU Max, + // use default provided by L0 driver. + // TODO: Use IP versioning to select based on range of devices + if (this->isPVC()) + return -1; + const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS"); + if (!UrRet) + return 0; + return std::atoi(UrRet); + }(); + + return Optimize32bitAccessMode; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 35404c6525..5f34efab44 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -39,6 +39,10 @@ enum EventsScope { LastCommandInBatchHostVisible }; +struct ze_global_memsize { + uint64_t value; +}; + struct ur_device_handle_t_ : _ur_object { ur_device_handle_t_(ze_device_handle_t Device, ur_platform_handle_t Plt, ur_device_handle_t ParentDevice = nullptr) @@ -141,6 +145,22 @@ struct ur_device_handle_t_ : _ur_object { // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; + // Returns whether large allocations are being used + // or not to have a consistent behavior throughout + // the adapter between the creation of large allocations + // and the compilation of kernels into stateful and + // stateless modes. + // With stateful mode, kernels are compiled with + // pointer-arithmetic optimizations for optimized + // access of allocations smaller than 4GB. + // In stateless mode, such optimizations are not + // applied. + // Even if a GPU supports both modes, L0 driver may + // provide support for only one, like for Intel(R) + // Data Center GPU Max, for which L0 driver only + // supports stateless. + int32_t useOptimized32bitAccess(); + bool isSubDevice() { return RootDevice != nullptr; } // Is this a Data Center GPU Max series (aka PVC)? @@ -170,4 +190,5 @@ struct ur_device_handle_t_ : _ur_object { ZeDeviceMemoryAccessProperties; ZeCache> ZeDeviceCacheProperties; ZeCache> ZeDeviceIpVersionExt; + ZeCache ZeGlobalMemSize; }; diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index b979c8ab15..d8af1e674d 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -165,10 +165,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( // event signal because it is already guaranteed that previous commands // in this queue are completed when the signal is started. // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Lenght == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // // TODO: this and other special handling of in-order queues to be // updated when/if Level Zero adds native support for in-order queues. // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal) { + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { if (EventWaitList.Length) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, @@ -181,6 +187,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( (CmdList->first, Event->ZeEvent, EventWaitList.Length, EventWaitList.ZeEventList)); } + return UR_RESULT_SUCCESS; }; @@ -964,8 +971,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool HostVisible, ur_event_handle_t *RetEvent) { - bool ProfilingEnabled = - !Queue || (Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; + bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled(); if (auto CachedEvent = Context->getEventFromContextCache(HostVisible, ProfilingEnabled)) { diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 92a3c87aea..f118a5b9dd 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -148,9 +148,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) ? ZE_MODULE_FORMAT_IL_SPIRV : ZE_MODULE_FORMAT_NATIVE; + ZeModuleDesc.inputSize = hProgram->CodeLength; ZeModuleDesc.pInputModule = hProgram->Code.get(); - ZeModuleDesc.pBuildFlags = pOptions; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + std::string ZeBuildOptions{}; + if (pOptions) { + ZeBuildOptions += pOptions; + } + + if (phDevices[0]->useOptimized32bitAccess() == 0) { + ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required"; + } + + ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; @@ -234,8 +249,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( // This produces better code because the driver can do cross-module // optimizations. Therefore, we just remember the compilation flags, so we // can use them later. - if (Options) + if (Options) { Program->BuildFlags = Options; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + if (Context->Devices[0]->useOptimized32bitAccess() == 0) { + Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required"; + } + } Program->State = ur_program_handle_t_::Object; return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 9c90a999b3..306cec5416 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -515,6 +515,11 @@ struct ur_queue_handle_t_ : _ur_object { // lists in the queue. ur_result_t insertStartBarrierIfDiscardEventsMode(ur_command_list_ptr_t &CmdList); + + // returns true if queue has profiling enabled + bool isProfilingEnabled() { + return ((this->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0); + } }; // This helper function creates a ur_event_handle_t and associate a diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index b508f7277c..09f4405744 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -11,6 +11,8 @@ #include #include +namespace { + ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { if (nullptr == pDdiTable) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; @@ -22,6 +24,11 @@ ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { } return UR_RESULT_SUCCESS; } +} // namespace + +#if defined(__cplusplus) +extern "C" { +#endif UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_api_version_t version, ///< [in] API version requested @@ -32,9 +39,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != retVal) { return retVal; } - - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterRelease = urAdapterRelease; pDdiTable->pfnAdapterRetain = urAdapterRetain; @@ -321,17 +325,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteExp = + urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; @@ -431,17 +435,43 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnCooperativeKernelLaunchExp = nullptr; + + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr; + + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } + pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - return retVal; + + return UR_RESULT_SUCCESS; } +#if defined(__cplusplus) +} // extern "C" +#endif diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index daec0408fb..c6d98855e7 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -178,9 +178,11 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.flags = 0; ZeDesc.ordinal = 0; - ZeStruct RelaxedDesc; - if (Size > Device->ZeDeviceProperties->maxMemAllocSize) { - // Tell Level-Zero to accept Size > maxMemAllocSize + if (Device->useOptimized32bitAccess() == 0 && + (Size > Device->ZeDeviceProperties->maxMemAllocSize)) { + // Tell Level-Zero to accept Size > maxMemAllocSize if + // large allocations are used. + ZeStruct RelaxedDesc; RelaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; ZeDesc.pNext = &RelaxedDesc; } diff --git a/source/adapters/native_cpu/adapter.cpp b/source/adapters/native_cpu/adapter.cpp index 920daf9944..622c3edc3d 100644 --- a/source/adapters/native_cpu/adapter.cpp +++ b/source/adapters/native_cpu/adapter.cpp @@ -15,15 +15,6 @@ struct ur_adapter_handle_t_ { std::atomic RefCount = 0; } Adapter; -UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, - ur_loader_config_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( uint32_t, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (phAdapters) { diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index f79bf7e3c5..f13a57f392 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -56,7 +56,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t, void *, const void *, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -65,7 +65,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -74,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, @@ -85,7 +85,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, const void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -95,7 +95,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { @@ -105,7 +105,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, @@ -116,7 +116,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, diff --git a/source/adapters/native_cpu/common.hpp b/source/adapters/native_cpu/common.hpp index 36ae7abd8b..d792cbbbcf 100644 --- a/source/adapters/native_cpu/common.hpp +++ b/source/adapters/native_cpu/common.hpp @@ -61,10 +61,16 @@ struct _ur_object { ur_shared_mutex Mutex; }; +// Todo: replace this with a common helper once it is available struct RefCounted { std::atomic_uint32_t _refCount; - void incrementReferenceCount() { _refCount++; } - void decrementReferenceCount() { _refCount--; } + uint32_t incrementReferenceCount() { return ++_refCount; } + uint32_t decrementReferenceCount() { return --_refCount; } RefCounted() : _refCount{1} {} uint32_t getReferenceCount() const { return _refCount; } }; + +template inline void decrementOrDelete(T *refC) { + if (refC->decrementReferenceCount() == 0) + delete refC; +} diff --git a/source/adapters/native_cpu/context.cpp b/source/adapters/native_cpu/context.cpp index e8732646f5..962525d1fc 100644 --- a/source/adapters/native_cpu/context.cpp +++ b/source/adapters/native_cpu/context.cpp @@ -32,13 +32,13 @@ urContextCreate(uint32_t DeviceCount, const ur_device_handle_t *phDevices, UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { - std::ignore = hContext; - DIE_NO_IMPLEMENTATION + hContext->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { - delete hContext; + decrementOrDelete(hContext); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/context.hpp b/source/adapters/native_cpu/context.hpp index 04404d7988..30bfb31d71 100644 --- a/source/adapters/native_cpu/context.hpp +++ b/source/adapters/native_cpu/context.hpp @@ -12,9 +12,10 @@ #include +#include "common.hpp" #include "device.hpp" -struct ur_context_handle_t_ { +struct ur_context_handle_t_ : RefCounted { ur_context_handle_t_(ur_device_handle_t_ *phDevices) : _device{phDevices} {} ur_device_handle_t _device; diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 78540a1b90..a72d3032fb 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -60,7 +60,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - switch (propName) { + switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: return ReturnValue(UR_DEVICE_TYPE_CPU); case UR_DEVICE_INFO_PARENT_DEVICE: diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index 24b30b4c67..d9e73c5453 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -144,12 +144,11 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl( size_t host_origin = (d + HostOffset.z) * HostSlicePitch + (h + HostOffset.y) * HostRowPitch + w + HostOffset.x; - int8_t &host_mem = ur_cast(DstMem)[host_origin]; int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; - if (IsRead) - host_mem = buff_mem; + if constexpr (IsRead) + ur_cast(DstMem)[host_origin] = buff_mem; else - buff_mem = host_mem; + buff_mem = ur_cast(DstMem)[host_origin]; } return UR_RESULT_SUCCESS; } @@ -160,6 +159,8 @@ static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr, const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) { // todo: non-blocking, events, UR integration + std::ignore = EventWaitList; + std::ignore = Event; std::ignore = hQueue; std::ignore = numEventsInWaitList; if (SrcPtr != DstPtr && Size) diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index 96648e57f8..7bfd3c328c 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -25,7 +25,8 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, if (kernelEntry == hProgram->_kernels.end()) return UR_RESULT_ERROR_INVALID_KERNEL; - auto f = reinterpret_cast(kernelEntry->second); + auto f = reinterpret_cast( + const_cast(kernelEntry->second)); auto kernel = new ur_kernel_handle_t_(pKernelName, *f); *phKernel = kernel; @@ -171,6 +172,13 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, // todo: set proper values return ReturnValue(0); } + case UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32: { +#ifdef _MSC_VER + __assume(0); +#else + __builtin_unreachable(); +#endif + } } DIE_NO_IMPLEMENTATION; } @@ -182,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - delete hKernel; + decrementOrDelete(hKernel); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/memory.hpp b/source/adapters/native_cpu/memory.hpp index 10f443ca0c..45a28ccc67 100644 --- a/source/adapters/native_cpu/memory.hpp +++ b/source/adapters/native_cpu/memory.hpp @@ -61,6 +61,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { : ur_mem_handle_t_(Size, false) {} _ur_buffer(_ur_buffer *b, size_t Offset, size_t Size) : ur_mem_handle_t_(b->_mem + Offset, false), SubBuffer(b) { + std::ignore = Size; SubBuffer.Origin = Offset; } diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index 63b8c988ba..ccd96a3a24 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -115,8 +115,7 @@ urProgramRetain(ur_program_handle_t hProgram) { UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { - delete hProgram; - + decrementOrDelete(hProgram); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/queue.cpp b/source/adapters/native_cpu/queue.cpp index d4e85ce989..516e66db64 100644 --- a/source/adapters/native_cpu/queue.cpp +++ b/source/adapters/native_cpu/queue.cpp @@ -43,12 +43,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { std::ignore = hQueue; + hQueue->incrementReferenceCount(); - DIE_NO_IMPLEMENTATION; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - delete hQueue; + decrementOrDelete(hQueue); + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/queue.hpp b/source/adapters/native_cpu/queue.hpp index 0c7d812496..5e9039dd24 100644 --- a/source/adapters/native_cpu/queue.hpp +++ b/source/adapters/native_cpu/queue.hpp @@ -8,5 +8,6 @@ // //===----------------------------------------------------------------------===// #pragma once +#include "common.hpp" -struct ur_queue_handle_t_ {}; +struct ur_queue_handle_t_ : RefCounted {}; diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 984676e684..1128317f96 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -200,8 +200,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; pDdiTable->pfnAdapterRelease = urAdapterRelease; @@ -273,17 +271,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteExp = + urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; @@ -380,19 +378,41 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnCooperativeKernelLaunchExp = nullptr; + + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr; + + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } + pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - return retVal; -} + return UR_RESULT_SUCCESS; +} } // extern "C" diff --git a/source/adapters/null/ur_null.cpp b/source/adapters/null/ur_null.cpp index 5a62761b67..d79b607ed1 100644 --- a/source/adapters/null/ur_null.cpp +++ b/source/adapters/null/ur_null.cpp @@ -38,21 +38,20 @@ context_t::context_t() { return UR_RESULT_SUCCESS; }; ////////////////////////////////////////////////////////////////////////// - urDdiTable.Platform.pfnGet = [](ur_adapter_handle_t *phAdapters, - uint32_t NumAdapters, uint32_t NumEntries, - ur_platform_handle_t *phPlatforms, - uint32_t *pNumPlatforms) { - if (phPlatforms != nullptr && NumEntries != 1) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - if (pNumPlatforms != nullptr) { - *pNumPlatforms = 1; - } - if (nullptr != phPlatforms) { - *reinterpret_cast(phPlatforms) = d_context.get(); - } - return UR_RESULT_SUCCESS; - }; + urDdiTable.Platform.pfnGet = + [](ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { + if (phPlatforms != nullptr && NumEntries != 1) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + if (pNumPlatforms != nullptr) { + *pNumPlatforms = 1; + } + if (nullptr != phPlatforms) { + *reinterpret_cast(phPlatforms) = d_context.get(); + } + return UR_RESULT_SUCCESS; + }; ////////////////////////////////////////////////////////////////////////// urDdiTable.Platform.pfnGetApiVersion = [](ur_platform_handle_t, @@ -122,8 +121,8 @@ context_t::context_t() { ////////////////////////////////////////////////////////////////////////// urDdiTable.Device.pfnGetInfo = - [](ur_device_handle_t hDevice, ur_device_info_t infoType, - size_t propSize, void *pDeviceInfo, size_t *pPropSizeRet) { + [](ur_device_handle_t, ur_device_info_t infoType, size_t propSize, + void *pDeviceInfo, size_t *pPropSizeRet) { switch (infoType) { case UR_DEVICE_INFO_TYPE: if (pDeviceInfo && propSize != sizeof(ur_device_type_t)) { @@ -165,25 +164,24 @@ context_t::context_t() { }; ////////////////////////////////////////////////////////////////////////// - urDdiTable.USM.pfnHostAlloc = - [](ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, - ur_usm_pool_handle_t pool, size_t size, void **ppMem) { - if (size == 0) { - *ppMem = nullptr; - return UR_RESULT_ERROR_UNSUPPORTED_SIZE; - } - *ppMem = malloc(size); - if (ppMem == nullptr) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return UR_RESULT_SUCCESS; - }; + urDdiTable.USM.pfnHostAlloc = [](ur_context_handle_t, const ur_usm_desc_t *, + ur_usm_pool_handle_t, size_t size, + void **ppMem) { + if (size == 0) { + *ppMem = nullptr; + return UR_RESULT_ERROR_UNSUPPORTED_SIZE; + } + *ppMem = malloc(size); + if (ppMem == nullptr) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + return UR_RESULT_SUCCESS; + }; ////////////////////////////////////////////////////////////////////////// urDdiTable.USM.pfnDeviceAlloc = - [](ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t size, void **ppMem) { + [](ur_context_handle_t, ur_device_handle_t, const ur_usm_desc_t *, + ur_usm_pool_handle_t, size_t size, void **ppMem) { if (size == 0) { *ppMem = nullptr; return UR_RESULT_ERROR_UNSUPPORTED_SIZE; @@ -196,16 +194,15 @@ context_t::context_t() { }; ////////////////////////////////////////////////////////////////////////// - urDdiTable.USM.pfnFree = [](ur_context_handle_t hContext, void *pMem) { + urDdiTable.USM.pfnFree = [](ur_context_handle_t, void *pMem) { free(pMem); return UR_RESULT_SUCCESS; }; ////////////////////////////////////////////////////////////////////////// urDdiTable.USM.pfnGetMemAllocInfo = - [](ur_context_handle_t hContext, const void *pMem, - ur_usm_alloc_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { + [](ur_context_handle_t, const void *pMem, ur_usm_alloc_info_t propName, + size_t, void *pPropValue, size_t *pPropSizeRet) { switch (propName) { case UR_USM_ALLOC_INFO_TYPE: *reinterpret_cast(pPropValue) = diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 8e95e26ccf..f016830d11 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -12,49 +12,6 @@ #include "ur_null.hpp" namespace driver { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. - ) try { - ur_result_t result = UR_RESULT_SUCCESS; - - // if the driver has created a custom function, then call it instead of using the generic path - auto pfnInit = d_context.urDdiTable.Global.pfnInit; - if (nullptr != pfnInit) { - result = pfnInit(device_flags, hLoaderConfig); - } else { - // generic implementation - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters - ) try { - ur_result_t result = UR_RESULT_SUCCESS; - - // if the driver has created a custom function, then call it instead of using the generic path - auto pfnTearDown = d_context.urDdiTable.Global.pfnTearDown; - if (nullptr != pfnTearDown) { - result = pfnTearDown(pParams); - } else { - // generic implementation - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -363,8 +320,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -2960,7 +2917,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -2999,7 +2957,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -3040,7 +2999,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -3093,7 +3053,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -3148,9 +3109,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -3187,9 +3150,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -3238,10 +3203,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -3277,7 +3243,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -3321,7 +3288,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -3365,9 +3333,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -3411,7 +3381,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -3488,7 +3459,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -3511,7 +3482,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( // if the driver has created a custom function, then call it instead of using the generic path auto pfnUSMFill = d_context.urDdiTable.Enqueue.pfnUSMFill; if (nullptr != pfnUSMFill) { - result = pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + result = pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } else { // generic implementation @@ -3530,9 +3501,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -3565,9 +3538,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -3601,9 +3575,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -3631,7 +3606,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -3678,10 +3654,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -3875,7 +3854,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -3888,7 +3867,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( phEvent); } else { // generic implementation - *phEvent = reinterpret_cast(d_context.get()); + if (nullptr != phEvent) { + *phEvent = reinterpret_cast(d_context.get()); + } } return result; @@ -4588,8 +4569,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -4605,10 +4586,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMemcpyUSMExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; - if (nullptr != pfnAppendMemcpyUSMExp) { - result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + auto pfnAppendUSMMemcpyExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; + if (nullptr != pfnAppendUSMMemcpyExp) { + result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4621,8 +4602,43 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendUSMFillExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + if (nullptr != pfnAppendUSMFillExp) { + result = pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, + patternSize, size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -4640,10 +4656,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferCopyExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; - if (nullptr != pfnAppendMembufferCopyExp) { - result = pfnAppendMembufferCopyExp( + auto pfnAppendMemBufferCopyExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; + if (nullptr != pfnAppendMemBufferCopyExp) { + result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4656,8 +4672,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4675,10 +4691,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferWriteExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; - if (nullptr != pfnAppendMembufferWriteExp) { - result = pfnAppendMembufferWriteExp(hCommandBuffer, hBuffer, offset, + auto pfnAppendMemBufferWriteExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; + if (nullptr != pfnAppendMemBufferWriteExp) { + result = pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4691,8 +4707,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4709,10 +4725,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferReadExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; - if (nullptr != pfnAppendMembufferReadExp) { - result = pfnAppendMembufferReadExp(hCommandBuffer, hBuffer, offset, + auto pfnAppendMemBufferReadExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; + if (nullptr != pfnAppendMemBufferReadExp) { + result = pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4725,8 +4741,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -4751,10 +4767,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferCopyRectExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyRectExp; - if (nullptr != pfnAppendMembufferCopyRectExp) { - result = pfnAppendMembufferCopyRectExp( + auto pfnAppendMemBufferCopyRectExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; + if (nullptr != pfnAppendMemBufferCopyRectExp) { + result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -4768,8 +4784,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4800,10 +4816,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferWriteRectExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteRectExp; - if (nullptr != pfnAppendMembufferWriteRectExp) { - result = pfnAppendMembufferWriteRectExp( + auto pfnAppendMemBufferWriteRectExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; + if (nullptr != pfnAppendMemBufferWriteRectExp) { + result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -4817,8 +4833,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4847,10 +4863,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferReadRectExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadRectExp; - if (nullptr != pfnAppendMembufferReadRectExp) { - result = pfnAppendMembufferReadRectExp( + auto pfnAppendMemBufferReadRectExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; + if (nullptr != pfnAppendMemBufferReadRectExp) { + result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -4863,6 +4879,108 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendMemBufferFillExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + if (nullptr != pfnAppendMemBufferFillExp) { + result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendUSMPrefetchExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + if (nullptr != pfnAppendUSMPrefetchExp) { + result = pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendUSMAdviseExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + if (nullptr != pfnAppendUSMAdviseExp) { + result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -4899,6 +5017,80 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnCooperativeKernelLaunchExp = + d_context.urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + if (nullptr != pfnCooperativeKernelLaunchExp) { + result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); + } else { + // generic implementation + if (nullptr != phEvent) { + *phEvent = reinterpret_cast(d_context.get()); + } + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnSuggestMaxCooperativeGroupCountExp = + d_context.urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + if (nullptr != pfnSuggestMaxCooperativeGroupCountExp) { + result = pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramBuildExp __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( @@ -5135,10 +5327,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnInit = driver::urInit; - - pDdiTable->pfnTearDown = driver::urTearDown; - pDdiTable->pfnAdapterGet = driver::urAdapterGet; pDdiTable->pfnAdapterRelease = driver::urAdapterRelease; @@ -5259,26 +5447,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendKernelLaunchExp = driver::urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = - driver::urCommandBufferAppendMemcpyUSMExp; + pDdiTable->pfnAppendUSMMemcpyExp = + driver::urCommandBufferAppendUSMMemcpyExp; - pDdiTable->pfnAppendMembufferCopyExp = - driver::urCommandBufferAppendMembufferCopyExp; + pDdiTable->pfnAppendUSMFillExp = driver::urCommandBufferAppendUSMFillExp; - pDdiTable->pfnAppendMembufferWriteExp = - driver::urCommandBufferAppendMembufferWriteExp; + pDdiTable->pfnAppendMemBufferCopyExp = + driver::urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMembufferReadExp = - driver::urCommandBufferAppendMembufferReadExp; + pDdiTable->pfnAppendMemBufferWriteExp = + driver::urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - driver::urCommandBufferAppendMembufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = + driver::urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - driver::urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + driver::urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - driver::urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + driver::urCommandBufferAppendMemBufferWriteRectExp; + + pDdiTable->pfnAppendMemBufferReadRectExp = + driver::urCommandBufferAppendMemBufferReadRectExp; + + pDdiTable->pfnAppendMemBufferFillExp = + driver::urCommandBufferAppendMemBufferFillExp; + + pDdiTable->pfnAppendUSMPrefetchExp = + driver::urCommandBufferAppendUSMPrefetchExp; + + pDdiTable->pfnAppendUSMAdviseExp = + driver::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; @@ -5411,6 +5610,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnCooperativeKernelLaunchExp = + driver::urEnqueueCooperativeKernelLaunchExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses @@ -5516,6 +5746,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + driver::urKernelSuggestMaxCooperativeGroupCountExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index dc43a68ffa..5feb673175 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -5,65 +5,96 @@ set(OPENCL_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "OpenCL adapter directory") +set(UR_OPENCL_INCLUDE_DIR "" CACHE PATH "Directory containing the OpenCL Headers") +set(UR_OPENCL_ICD_LOADER_LIBRARY "" CACHE FILEPATH "Path of the OpenCL ICD Loader library") + +find_package(Threads REQUIRED) + set(TARGET_NAME ur_adapter_opencl) -add_ur_adapter(${TARGET_NAME} - SHARED - ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/context.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/enqueue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp +add_ur_adapter(${TARGET_NAME} SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/context.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/device.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/enqueue.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) set_target_properties(${TARGET_NAME} PROPERTIES - VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}" - SOVERSION "${PROJECT_VERSION_MAJOR}" + VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}" + SOVERSION "${PROJECT_VERSION_MAJOR}" ) -find_package(Threads REQUIRED) +if(UR_OPENCL_INCLUDE_DIR) + set(OpenCLIncludeDirectory ${UR_OPENCL_INCLUDE_DIR}) +else() + FetchContent_Declare(OpenCL-Headers + GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-Headers.git" + GIT_TAG main + ) + FetchContent_MakeAvailable(OpenCL-Headers) + FetchContent_GetProperties(OpenCL-Headers + SOURCE_DIR OpenCLIncludeDirectory + ) +endif() -# The OpenCL target can be set manually on upstream cmake to avoid using find_package(). -if (NOT UR_OPENCL_ICD_LOADER_LIBRARY) - find_package(OpenCL REQUIRED) - message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") - message(STATUS "OpenCL_INCLUDE_DIR: ${OpenCL_INCLUDE_DIR}") - set(UR_OPENCL_ICD_LOADER_LIBRARY OpenCL::OpenCL) +# The OpenCL target can be set manually on upstream cmake to avoid using +# find_package(). +if(UR_OPENCL_ICD_LOADER_LIBRARY) + set(OpenCLICDLoaderLibrary ${UR_OPENCL_ICD_LOADER_LIBRARY}) +else() + find_package(OpenCL 3.0) + if(OpenCL_FOUND) + set(OpenCLICDLoaderLibrary OpenCL::OpenCL) + else() + FetchContent_Declare(OpenCL-ICD-Loader + GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-ICD-Loader.git" + GIT_TAG main + ) + FetchContent_MakeAvailable(OpenCL-ICD-Loader) + set(OpenCLICDLoaderLibrary ${PROJECT_BINARY_DIR}/lib/libOpenCL.so) + endif() endif() +message(STATUS "OpenCL Include Directory: ${OpenCLIncludeDirectory}") +message(STATUS "OpenCL ICD Loader Library: ${OpenCLICDLoaderLibrary}") + # Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION. # Define all symbols up to OpenCL 3.0. -target_compile_definitions(ur_adapter_opencl PRIVATE CL_TARGET_OPENCL_VERSION=300 CL_USE_DEPRECATED_OPENCL_1_2_APIS) - -target_link_libraries(${TARGET_NAME} PRIVATE - ${PROJECT_NAME}::headers - ${PROJECT_NAME}::common - ${PROJECT_NAME}::unified_malloc_framework - Threads::Threads - ${UR_OPENCL_ICD_LOADER_LIBRARY} +target_compile_definitions(ur_adapter_opencl PRIVATE + CL_TARGET_OPENCL_VERSION=300 + CL_USE_DEPRECATED_OPENCL_1_2_APIS ) target_include_directories(${TARGET_NAME} PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../" - ${OpenCL_INCLUDE_DIR} + ${OpenCLIncludeDirectory} + "${CMAKE_CURRENT_SOURCE_DIR}/../../" +) + +target_link_libraries(${TARGET_NAME} PRIVATE + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::common + ${PROJECT_NAME}::unified_malloc_framework + Threads::Threads + ${OpenCLICDLoaderLibrary} ) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 65c5676bf9..8ae1e77755 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -17,15 +17,6 @@ struct ur_adapter_handle_t_ { ur_adapter_handle_t_ adapter{}; -UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, - ur_loader_config_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { @@ -75,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, switch (propName) { case UR_ADAPTER_INFO_BACKEND: - return ReturnValue(UR_ADAPTER_BACKEND_CUDA); + return ReturnValue(UR_ADAPTER_BACKEND_OPENCL); case UR_ADAPTER_INFO_REFERENCE_COUNT: return ReturnValue(adapter.RefCount.load()); default: diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 121a991cbd..56b4d16b88 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -11,61 +11,121 @@ #include "command_buffer.hpp" #include "common.hpp" -/// Stub implementations of UR experimental feature command-buffers - UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, + ur_context_handle_t hContext, ur_device_handle_t hDevice, [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - [[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) { + ur_exp_command_buffer_handle_t *phCommandBuffer) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t Queue = nullptr; + UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); + + cl_context CLContext = cl_adapter::cast(hContext); + cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, + cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR); + + if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + auto CLCommandBuffer = clCreateCommandBufferKHR( + 1, cl_adapter::cast(&Queue), nullptr, &Res); + CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); + + try { + auto URCommandBuffer = std::make_unique( + Queue, hContext, CLCommandBuffer); + *phCommandBuffer = URCommandBuffer.release(); + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } + + CL_RETURN_ON_FAILURE(Res); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, + cl_ext::RetainCommandBufferName, &clRetainCommandBuffer); + + if (!clRetainCommandBuffer || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, + cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); + + if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, + cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (!clFinalizeCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_kernel_handle_t hKernel, - [[maybe_unused]] uint32_t workDim, - [[maybe_unused]] const size_t *pGlobalWorkOffset, - [[maybe_unused]] const size_t *pGlobalWorkSize, - [[maybe_unused]] const size_t *pLocalWorkSize, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, + cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR); + + if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t size, @@ -73,28 +133,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_mem_handle_t hSrcMem, - [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, - [[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size, + [[maybe_unused]] void *pMemory, [[maybe_unused]] const void *pPattern, + [[maybe_unused]] size_t patternSize, [[maybe_unused]] size_t size, [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, + cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR); + + if (!clCommandCopyBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( + hCommandBuffer->CLCommandBuffer, nullptr, + cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hSrcMem, [[maybe_unused]] ur_mem_handle_t hDstMem, @@ -108,13 +186,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + size_t OpenCLOriginRect[3]{srcOrigin.x, srcOrigin.y, srcOrigin.z}; + size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; + size_t OpenCLRegion[3]{region.width, region.height, region.depth}; + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, + cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR); + + if (!clCommandCopyBufferRectKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( + hCommandBuffer->CLCommandBuffer, nullptr, + cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc, @@ -129,7 +226,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size, [[maybe_unused]] void *pDst, @@ -137,14 +234,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] ur_rect_offset_t bufferOffset, @@ -158,14 +252,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] ur_rect_offset_t bufferOffset, @@ -179,20 +270,55 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, + cl_ext::CommandFillBufferName, &clCommandFillBufferKHR); + + if (!clCommandFillBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( + hCommandBuffer->CLCommandBuffer, nullptr, + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_queue_handle_t hQueue, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, + cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR); + + if (!clEnqueueCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + const uint32_t NumberOfQueues = 1; + + CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR( + NumberOfQueues, cl_adapter::cast(&hQueue), + hCommandBuffer->CLCommandBuffer, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index 7ab145c53d..d80f29594b 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -8,8 +8,17 @@ // //===----------------------------------------------------------------------===// +#include #include -/// Stub implementation of command-buffers for OpenCL +struct ur_exp_command_buffer_handle_t_ { + ur_queue_handle_t hInternalQueue; + ur_context_handle_t hContext; + cl_command_buffer_khr CLCommandBuffer; -struct ur_exp_command_buffer_handle_t_ {}; + ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue, + ur_context_handle_t hContext, + cl_command_buffer_khr CLCommandBuffer) + : hInternalQueue(hQueue), hContext(hContext), + CLCommandBuffer(CLCommandBuffer) {} +}; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 77a51694dd..4fe8bed408 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -77,6 +77,10 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; case CL_INVALID_ARG_INDEX: return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; + case CL_INVALID_COMMAND_BUFFER_KHR: + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP; + case CL_INVALID_SYNC_POINT_WAIT_LIST_KHR: + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 95105b552d..0cb19694a6 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -12,7 +12,6 @@ #include #include #include -#include #include /** @@ -72,12 +71,25 @@ class OpenCLVersion { * 'OpenCL' for devices. */ - std::regex Rx("OpenCL ([0-9]+)\\.([0-9]+)"); - std::smatch Match; + std::string_view Prefix = "OpenCL "; + size_t VersionBegin = Version.find_first_of(" "); + size_t VersionEnd = Version.find_first_of(" ", VersionBegin + 1); + size_t VersionSeparator = Version.find_first_of(".", VersionBegin + 1); - if (std::regex_search(Version, Match, Rx) && (Match.size() == 3)) { - OCLMajor = strtoul(Match[1].str().c_str(), nullptr, 10); - OCLMinor = strtoul(Match[2].str().c_str(), nullptr, 10); + bool HaveOCLPrefix = + std::equal(Prefix.begin(), Prefix.end(), Version.begin()); + + if (HaveOCLPrefix && VersionBegin != std::string::npos && + VersionEnd != std::string::npos && + VersionSeparator != std::string::npos) { + + std::string VersionMajor{Version.begin() + VersionBegin + 1, + Version.begin() + VersionSeparator}; + std::string VersionMinor{Version.begin() + VersionSeparator + 1, + Version.begin() + VersionEnd}; + + OCLMajor = strtoul(VersionMajor.c_str(), nullptr, 10); + OCLMinor = strtoul(VersionMinor.c_str(), nullptr, 10); if (!isValid()) { OCLMajor = OCLMinor = 0; @@ -192,6 +204,16 @@ CONSTFIX char EnqueueReadGlobalVariableName[] = // Names of host pipe functions queried from OpenCL CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; +// Names of command buffer functions queried from OpenCL +CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR"; +CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR"; +CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR"; +CONSTFIX char FinalizeCommandBufferName[] = "clFinalizeCommandBufferKHR"; +CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR"; +CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; +CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; +CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; +CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; #undef CONSTFIX @@ -226,6 +248,58 @@ cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, cl_uint num_events_in_waitlist, const cl_event *events_waitlist, cl_event *event); +using clCreateCommandBufferKHR_fn = CL_API_ENTRY cl_command_buffer_khr( + CL_API_CALL *)(cl_uint num_queues, const cl_command_queue *queues, + const cl_command_buffer_properties_khr *properties, + cl_int *errcode_ret); + +using clRetainCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clReleaseCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clFinalizeCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clCommandNDRangeKernelKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr *properties, + cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, + const size_t *global_work_size, const size_t *local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferRectKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, + const size_t *dst_origin, const size_t *region, size_t src_row_pitch, + size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -255,6 +329,15 @@ struct ExtFuncPtrCacheT { FuncPtrCache clEnqueueWriteHostPipeINTELCache; FuncPtrCache clSetProgramSpecializationConstantCache; + FuncPtrCache clCreateCommandBufferKHRCache; + FuncPtrCache clRetainCommandBufferKHRCache; + FuncPtrCache clReleaseCommandBufferKHRCache; + FuncPtrCache clFinalizeCommandBufferKHRCache; + FuncPtrCache clCommandNDRangeKernelKHRCache; + FuncPtrCache clCommandCopyBufferKHRCache; + FuncPtrCache clCommandCopyBufferRectKHRCache; + FuncPtrCache clCommandFillBufferKHRCache; + FuncPtrCache clEnqueueCommandBufferKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 710ebcfb88..27577eab39 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -886,7 +886,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_PROFILE: case UR_DEVICE_INFO_VERSION: case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: - case UR_DEVICE_INFO_EXTENSIONS: case UR_DEVICE_INFO_BUILT_IN_KERNELS: case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: @@ -908,6 +907,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_EXTENSIONS: { + cl_device_id Dev = cl_adapter::cast(hDevice); + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + if (ExtStr.find("cl_khr_command_buffer") != std::string::npos) { + SupportedExtensions += " ur_exp_command_buffer"; + } + return ReturnValue(SupportedExtensions.c_str()); + } /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU * EU device-specific information extensions. Some of the queries are * enabled by cl_intel_device_attribute_query extension, but it's not yet in diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 24d60e62f5..6830a28eec 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -25,77 +25,6 @@ cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { return CLFlags; } -ur_result_t ValidateBufferSize(ur_mem_handle_t Buffer, size_t Size, - size_t Origin) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); - if (Size + Origin > BufferSize) - return UR_RESULT_ERROR_INVALID_SIZE; - return UR_RESULT_SUCCESS; -} - -ur_result_t ValidateBufferRectSize(ur_mem_handle_t Buffer, - ur_rect_region_t Region, - ur_rect_offset_t Offset) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); - if (Offset.x >= BufferSize || Offset.y >= BufferSize || - Offset.z >= BufferSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - if ((Region.width + Offset.x) * (Region.height + Offset.y) * - (Region.depth + Offset.z) > - BufferSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t ValidateImageSize(ur_mem_handle_t Image, ur_rect_region_t Region, - ur_rect_offset_t Origin) { - size_t Width = 0; - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), - CL_IMAGE_WIDTH, sizeof(Width), &Width, - nullptr)); - if (Region.width + Origin.x > Width) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - size_t Height = 0; - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), - CL_IMAGE_HEIGHT, sizeof(Height), &Height, - nullptr)); - - // CL returns a height and depth of 0 for images that don't have those - // dimensions, but regions for enqueue operations must set these to 1, so we - // need to make this adjustment to validate. - if (Height == 0) - Height = 1; - - if (Region.height + Origin.y > Height) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - size_t Depth = 0; - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), - CL_IMAGE_DEPTH, sizeof(Depth), &Depth, - nullptr)); - if (Depth == 0) - Depth = 1; - - if (Region.depth + Origin.z > Depth) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, @@ -141,16 +70,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto ClErr = clEnqueueReadBuffer( + CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -158,16 +84,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto ClErr = clEnqueueWriteBuffer( + CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( @@ -182,18 +105,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueReadBufferRect( + CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( @@ -208,18 +128,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueWriteBufferRect( + CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( @@ -228,18 +145,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto ClErr = clEnqueueCopyBuffer( + CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferSrc, size, srcOffset)); - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferDst, size, dstOffset)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -253,19 +166,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueCopyBufferRect( + CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), SrcOrigin, DstOrigin, Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferSrc, region, srcOrigin)); - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferDst, region, dstOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( @@ -276,16 +185,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - auto ClErr = (clEnqueueFillBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, - numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - if (ClErr != CL_SUCCESS) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } - return mapCLErrorToUR(ClErr); + CL_RETURN_ON_FAILURE( + clEnqueueFillBuffer(cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, + patternSize, offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + return UR_RESULT_SUCCESS; } auto NumValues = size / sizeof(uint64_t); @@ -303,7 +209,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, offset, size)); CL_RETURN_ON_FAILURE(ClErr); } @@ -338,17 +243,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueReadImage( + CL_RETURN_ON_FAILURE(clEnqueueReadImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingRead, Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -359,17 +261,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueWriteImage( + CL_RETURN_ON_FAILURE(clEnqueueWriteImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingWrite, Origin, Region, rowPitch, slicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( @@ -382,18 +281,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueCopyImage( + CL_RETURN_ON_FAILURE(clEnqueueCopyImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), SrcOrigin, DstOrigin, Region, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateImageSize(hImageSrc, region, srcOrigin)); - UR_RETURN_ON_FAILURE(ValidateImageSize(hImageDst, region, dstOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( @@ -410,9 +305,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent), &Err); - if (Err == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } return mapCLErrorToUR(Err); } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 87f1f58f1a..d180cfb097 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -220,13 +220,13 @@ urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, cl_int CallbackType = 0; switch (execStatus) { - case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: + case UR_EXECUTION_INFO_SUBMITTED: CallbackType = CL_SUBMITTED; break; - case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: + case UR_EXECUTION_INFO_RUNNING: CallbackType = CL_RUNNING; break; - case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: + case UR_EXECUTION_INFO_COMPLETE: CallbackType = CL_COMPLETE; break; default: diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index e7c8444a17..44157b826b 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -9,6 +9,9 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include +#include + UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 9b676c0044..b42df19350 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -201,8 +201,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != Result) { return Result; } - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterRelease = urAdapterRelease; pDdiTable->pfnAdapterRetain = urAdapterRetain; @@ -287,17 +285,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteExp = + urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; @@ -381,21 +379,43 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnCooperativeKernelLaunchExp = nullptr; + + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr; + + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } + pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - return retVal; -} + return UR_RESULT_SUCCESS; +} #if defined(__cplusplus) } // extern "C" #endif diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index f240f9908b..5c6fb231da 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -3,28 +3,25 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -add_library(ur_common INTERFACE) +add_subdirectory(unified_malloc_framework) +add_subdirectory(umf_pools) + +add_ur_library(ur_common STATIC + umf_helpers.hpp + ur_pool_manager.hpp + $<$:windows/ur_lib_loader.cpp> + $<$:linux/ur_lib_loader.cpp> +) add_library(${PROJECT_NAME}::common ALIAS ur_common) -target_include_directories(ur_common INTERFACE +target_include_directories(ur_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/include ) -add_subdirectory(unified_malloc_framework) -add_subdirectory(umf_pools) -target_link_libraries(ur_common INTERFACE unified_malloc_framework disjoint_pool ${CMAKE_DL_LIBS} ${PROJECT_NAME}::headers) - -if(WIN32) - target_sources(ur_common - INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/windows/ur_lib_loader.cpp - umf_helpers.hpp ur_pool_manager.hpp - ) -else() - target_sources(ur_common - INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/linux/ur_lib_loader.cpp - umf_helpers.hpp ur_pool_manager.hpp - ) -endif() +target_link_libraries(ur_common PUBLIC + unified_malloc_framework + disjoint_pool + ${CMAKE_DL_LIBS} + ${PROJECT_NAME}::headers +) diff --git a/source/common/linux/ur_lib_loader.cpp b/source/common/linux/ur_lib_loader.cpp index 1c5e0af89b..d316e48b74 100644 --- a/source/common/linux/ur_lib_loader.cpp +++ b/source/common/linux/ur_lib_loader.cpp @@ -12,12 +12,7 @@ #include "logger/ur_logger.hpp" #include "ur_lib_loader.hpp" -#if defined(SANITIZER_ANY) || defined(__APPLE__) -#define LOAD_DRIVER_LIBRARY(NAME) dlopen(NAME, RTLD_LAZY | RTLD_LOCAL) -#else -#define LOAD_DRIVER_LIBRARY(NAME) \ - dlopen(NAME, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND) -#endif +#define DEEP_BIND_ENV "UR_ADAPTERS_DEEP_BIND" namespace ur_loader { @@ -34,8 +29,21 @@ void LibLoader::freeAdapterLibrary(HMODULE handle) { std::unique_ptr LibLoader::loadAdapterLibrary(const char *name) { - return std::unique_ptr( - LOAD_DRIVER_LIBRARY(name)); + int mode = RTLD_LAZY | RTLD_LOCAL; +#if !defined(__APPLE__) + bool deepbind = getenv_tobool(DEEP_BIND_ENV); + if (deepbind) { +#if defined(SANITIZER_ANY) + logger::warning( + "Enabling RTLD_DEEPBIND while running under a sanitizer is likely " + "to cause issues. Consider disabling {} environment variable.", + DEEP_BIND_ENV); +#endif + mode |= RTLD_DEEPBIND; + } +#endif + + return std::unique_ptr(dlopen(name, mode)); } void *LibLoader::getFunctionPtr(HMODULE handle, const char *func_name) { diff --git a/source/common/logger/ur_sinks.hpp b/source/common/logger/ur_sinks.hpp index 66322e98a6..cb8c751e4d 100644 --- a/source/common/logger/ur_sinks.hpp +++ b/source/common/logger/ur_sinks.hpp @@ -13,6 +13,7 @@ #include "ur_filesystem_resolved.hpp" #include "ur_level.hpp" +#include "ur_print.hpp" namespace logger { @@ -65,16 +66,17 @@ class Sink { if (*(++fmt) == '{') { buffer << *fmt++; } else { - std::cerr - << error_prefix - << "No arguments provided and braces not escaped!"; + std::cerr << error_prefix + << "No arguments provided and braces not escaped!" + << std::endl; } } else if (*fmt == '}') { if (*(++fmt) == '}') { buffer << *fmt++; } else { std::cerr << error_prefix - << "Closing curly brace not escaped!"; + << "Closing curly brace not escaped!" + << std::endl; } } } @@ -95,7 +97,7 @@ class Sink { buffer << *fmt++; } else if (*fmt != '}') { std::cerr << error_prefix - << "Only empty braces are allowed!"; + << "Only empty braces are allowed!" << std::endl; } else { buffer << arg; arg_printed = true; @@ -105,9 +107,17 @@ class Sink { buffer << *fmt++; } else { std::cerr << error_prefix - << "Closing curly brace not escaped!"; + << "Closing curly brace not escaped!" + << std::endl; } } + + if (*fmt == '\0') { + std::cerr << error_prefix << "Too many arguments!" << std::endl; + // ignore all left arguments and finalize message + format(buffer, fmt); + return; + } } format(buffer, ++fmt, std::forward(args)...); diff --git a/source/common/umf_pools/disjoint_pool.cpp b/source/common/umf_pools/disjoint_pool.cpp index 5c6c3a852a..8d7d59a1a6 100644 --- a/source/common/umf_pools/disjoint_pool.cpp +++ b/source/common/umf_pools/disjoint_pool.cpp @@ -293,6 +293,8 @@ class DisjointPool::AllocImpl { // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. // Powers of 2 and the value halfway between the powers of 2. auto Size1 = params.MinBucketSize; + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = std::max(Size1, MIN_BUCKET_DEFAULT_SIZE); auto Size2 = Size1 + Size1 / 2; for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { Buckets.push_back(std::make_unique(Size1, *this)); @@ -381,6 +383,15 @@ Slab::~Slab() { } catch (MemoryProviderError &e) { std::cout << "DisjointPool: error from memory provider: " << e.code << "\n"; + if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { + const char *message = ""; + int error = 0; + + umfMemoryProviderGetLastNativeError( + umfGetLastFailedMemoryProvider(), &message, &error); + std::cout << "Native error msg: " << message + << ", native error code: " << error << std::endl; + } } } @@ -885,6 +896,12 @@ umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t *providers, if (numProviders != 1 || !providers[0]) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + // MinBucketSize parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!parameters.MinBucketSize || + !((parameters.MinBucketSize & (parameters.MinBucketSize - 1)) == 0)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } impl = std::make_unique(providers[0], parameters); return UMF_RESULT_SUCCESS; @@ -896,7 +913,7 @@ void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates auto Ptr = impl->allocate(size, FromPool); if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().name; + const auto &MT = impl->getParams().name; std::cout << "Allocated " << std::setw(8) << size << " " << MT << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" << Ptr << std::endl; @@ -921,7 +938,7 @@ void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { auto Ptr = impl->allocate(size, alignment, FromPool); if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().name; + const auto &MT = impl->getParams().name; std::cout << "Allocated " << std::setw(8) << size << " " << MT << " bytes aligned at " << alignment << " from " << (FromPool ? "Pool" : "Provider") << " ->" << Ptr @@ -940,7 +957,7 @@ enum umf_result_t DisjointPool::free(void *ptr) try { impl->deallocate(ptr, ToPool); if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().name; + const auto &MT = impl->getParams().name; std::cout << "Freed " << MT << " " << ptr << " to " << (ToPool ? "Pool" : "Provider") << ", Current total pool size " diff --git a/source/common/umf_pools/disjoint_pool.hpp b/source/common/umf_pools/disjoint_pool.hpp index a8c9487ef2..2c465eff3c 100644 --- a/source/common/umf_pools/disjoint_pool.hpp +++ b/source/common/umf_pools/disjoint_pool.hpp @@ -17,6 +17,8 @@ namespace usm { +inline constexpr size_t MIN_BUCKET_DEFAULT_SIZE = 8; + // Configuration for specific USM allocator instance class DisjointPoolConfig { public: @@ -45,7 +47,8 @@ class DisjointPoolConfig { size_t Capacity = 0; // Holds the minimum bucket size valid for allocation of a memory type. - size_t MinBucketSize = 0; + // This value must be a power of 2. + size_t MinBucketSize = MIN_BUCKET_DEFAULT_SIZE; // Holds size of the pool managed by the allocator. size_t CurPoolSize = 0; diff --git a/source/common/unified_malloc_framework/CMakeLists.txt b/source/common/unified_malloc_framework/CMakeLists.txt index 15744605ec..a71b688b74 100644 --- a/source/common/unified_malloc_framework/CMakeLists.txt +++ b/source/common/unified_malloc_framework/CMakeLists.txt @@ -6,10 +6,17 @@ set(UMF_SOURCES src/memory_pool.c src/memory_provider.c - src/memory_tracker.cpp + src/memory_tracker.c src/memory_provider_get_last_failed.cpp + src/critnib/critnib.c ) +if(MSVC) + set(UMF_SOURCES ${UMF_SOURCES} src/utils/utils_windows.cpp src/memory_tracker_windows.cpp) +else() + set(UMF_SOURCES ${UMF_SOURCES} src/utils/utils_posix.c) +endif() + if(UMF_BUILD_SHARED_LIBRARY) message(WARNING "Unified Malloc Framework is still an early work in progress." "There are no API/ABI backward compatibility guarantees. There will be breakages." diff --git a/source/common/unified_malloc_framework/src/critnib/critnib.c b/source/common/unified_malloc_framework/src/critnib/critnib.c new file mode 100644 index 0000000000..71a589d5dc --- /dev/null +++ b/source/common/unified_malloc_framework/src/critnib/critnib.c @@ -0,0 +1,813 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * critnib.c -- implementation of critnib tree + * + * It offers identity lookup (like a hashmap) and <= lookup (like a search + * tree). Unlike some hashing algorithms (cuckoo hash, perfect hashing) the + * complexity isn't constant, but for data sizes we expect it's several + * times as fast as cuckoo, and has no "stop the world" cases that would + * cause latency (ie, better worst case behaviour). + */ + +/* + * STRUCTURE DESCRIPTION + * + * Critnib is a hybrid between a radix tree and DJ Bernstein's critbit: + * it skips nodes for uninteresting radix nodes (ie, ones that would have + * exactly one child), this requires adding to every node a field that + * describes the slice (4-bit in our case) that this radix level is for. + * + * This implementation also stores each node's path (ie, bits that are + * common to every key in that subtree) -- this doesn't help with lookups + * at all (unused in == match, could be reconstructed at no cost in <= + * after first dive) but simplifies inserts and removes. If we ever want + * that piece of memory it's easy to trim it down. + */ + +/* + * CONCURRENCY ISSUES + * + * Reads are completely lock-free sync-free, but only almost wait-free: + * if for some reason a read thread gets pathologically stalled, it will + * notice the data being stale and restart the work. In usual cases, + * the structure having been modified does _not_ cause a restart. + * + * Writes could be easily made lock-free as well (with only a cmpxchg + * sync), but this leads to problems with removes. A possible solution + * would be doing removes by overwriting by NULL w/o freeing -- yet this + * would lead to the structure growing without bounds. Complex per-node + * locks would increase concurrency but they slow down individual writes + * enough that in practice a simple global write lock works faster. + * + * Removes are the only operation that can break reads. The structure + * can do local RCU well -- the problem being knowing when it's safe to + * free. Any synchronization with reads would kill their speed, thus + * instead we have a remove count. The grace period is DELETED_LIFE, + * after which any read will notice staleness and restart its work. + */ +#include +#include +#include + +#include "../utils/utils.h" +#include "critnib.h" + +/* + * A node that has been deleted is left untouched for this many delete + * cycles. Reads have guaranteed correctness if they took no longer than + * DELETED_LIFE concurrent deletes, otherwise they notice something is + * wrong and restart. The memory of deleted nodes is never freed to + * malloc nor their pointers lead anywhere wrong, thus a stale read will + * (temporarily) get a wrong answer but won't crash. + * + * There's no need to count writes as they never interfere with reads. + * + * Allowing stale reads (of arbitrarily old writes or of deletes less than + * DELETED_LIFE old) might sound counterintuitive, but it doesn't affect + * semantics in any way: the thread could have been stalled just after + * returning from our code. Thus, the guarantee is: the result of get() or + * find_le() is a value that was current at any point between the call + * start and end. + */ +#define DELETED_LIFE 16 + +#define SLICE 4 +#define NIB ((1ULL << SLICE) - 1) +#define SLNODES (1 << SLICE) + +typedef uintptr_t word; +typedef unsigned char sh_t; + +struct critnib_node { + /* + * path is the part of a tree that's already traversed (be it through + * explicit nodes or collapsed links) -- ie, any subtree below has all + * those bits set to this value. + * + * nib is a 4-bit slice that's an index into the node's children. + * + * shift is the length (in bits) of the part of the key below this node. + * + * nib + * |XXXXXXXXXX|?|*****| + * path ^ + * +-----+ + * shift + */ + struct critnib_node *child[SLNODES]; + word path; + sh_t shift; +}; + +struct critnib_leaf { + word key; + void *value; +}; + +struct critnib { + struct critnib_node *root; + + /* pool of freed nodes: singly linked list, next at child[0] */ + struct critnib_node *deleted_node; + struct critnib_leaf *deleted_leaf; + + /* nodes removed but not yet eligible for reuse */ + struct critnib_node *pending_del_nodes[DELETED_LIFE]; + struct critnib_leaf *pending_del_leaves[DELETED_LIFE]; + + uint64_t remove_count; + + struct os_mutex_t *mutex; /* writes/removes */ +}; + +/* + * atomic load + */ +static void load(void *src, void *dst) { + util_atomic_load_acquire((word *)src, (word *)dst); +} + +static void load64(uint64_t *src, uint64_t *dst) { + util_atomic_load_acquire(src, dst); +} + +/* + * atomic store + */ +static void store(void *dst, void *src) { + util_atomic_store_release((word *)dst, (word)src); +} + +/* + * internal: is_leaf -- check tagged pointer for leafness + */ +static inline bool is_leaf(struct critnib_node *n) { return (word)n & 1; } + +/* + * internal: to_leaf -- untag a leaf pointer + */ +static inline struct critnib_leaf *to_leaf(struct critnib_node *n) { + return (void *)((word)n & ~1ULL); +} + +/* + * internal: path_mask -- return bit mask of a path above a subtree [shift] + * bits tall + */ +static inline word path_mask(sh_t shift) { return ~NIB << shift; } + +/* + * internal: slice_index -- return index of child at the given nib + */ +static inline unsigned slice_index(word key, sh_t shift) { + return (unsigned)((key >> shift) & NIB); +} + +/* + * critnib_new -- allocates a new critnib structure + */ +struct critnib *critnib_new(void) { + struct critnib *c = Zalloc(sizeof(struct critnib)); + if (!c) { + return NULL; + } + + c->mutex = util_mutex_create(); + if (!c->mutex) { + free(c); + return NULL; + } + + VALGRIND_HG_DRD_DISABLE_CHECKING(&c->root, sizeof(c->root)); + VALGRIND_HG_DRD_DISABLE_CHECKING(&c->remove_count, sizeof(c->remove_count)); + + return c; +} + +/* + * internal: delete_node -- recursively free (to malloc) a subtree + */ +static void delete_node(struct critnib_node *__restrict n) { + if (is_leaf(n)) { + Free(to_leaf(n)); + } else { + for (int i = 0; i < SLNODES; i++) { + if (n->child[i]) { + delete_node(n->child[i]); + } + } + + Free(n); + } +} + +/* + * critnib_delete -- destroy and free a critnib struct + */ +void critnib_delete(struct critnib *c) { + if (c->root) { + delete_node(c->root); + } + + util_mutex_destroy(c->mutex); + + for (struct critnib_node *m = c->deleted_node; m;) { + struct critnib_node *mm = m->child[0]; + Free(m); + m = mm; + } + + for (struct critnib_leaf *k = c->deleted_leaf; k;) { + struct critnib_leaf *kk = k->value; + Free(k); + k = kk; + } + + for (int i = 0; i < DELETED_LIFE; i++) { + Free(c->pending_del_nodes[i]); + Free(c->pending_del_leaves[i]); + } + + Free(c); +} + +/* + * internal: free_node -- free (to internal pool, not malloc) a node. + * + * We cannot free them to malloc as a stalled reader thread may still walk + * through such nodes; it will notice the result being bogus but only after + * completing the walk, thus we need to ensure any freed nodes still point + * to within the critnib structure. + */ +static void free_node(struct critnib *__restrict c, + struct critnib_node *__restrict n) { + if (!n) { + return; + } + + ASSERT(!is_leaf(n)); + n->child[0] = c->deleted_node; + c->deleted_node = n; +} + +/* + * internal: alloc_node -- allocate a node from our pool or from malloc + */ +static struct critnib_node *alloc_node(struct critnib *__restrict c) { + if (!c->deleted_node) { + return Malloc(sizeof(struct critnib_node)); + } + + struct critnib_node *n = c->deleted_node; + + c->deleted_node = n->child[0]; + VALGRIND_ANNOTATE_NEW_MEMORY(n, sizeof(*n)); + + return n; +} + +/* + * internal: free_leaf -- free (to internal pool, not malloc) a leaf. + * + * See free_node(). + */ +static void free_leaf(struct critnib *__restrict c, + struct critnib_leaf *__restrict k) { + if (!k) { + return; + } + + k->value = c->deleted_leaf; + c->deleted_leaf = k; +} + +/* + * internal: alloc_leaf -- allocate a leaf from our pool or from malloc + */ +static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { + if (!c->deleted_leaf) { + return Malloc(sizeof(struct critnib_leaf)); + } + + struct critnib_leaf *k = c->deleted_leaf; + + c->deleted_leaf = k->value; + VALGRIND_ANNOTATE_NEW_MEMORY(k, sizeof(*k)); + + return k; +} + +/* + * crinib_insert -- write a key:value pair to the critnib structure + * + * Returns: + * • 0 on success + * • EEXIST if such a key already exists + * • ENOMEM if we're out of memory + * + * Takes a global write lock but doesn't stall any readers. + */ +int critnib_insert(struct critnib *c, word key, void *value, int update) { + util_mutex_lock(c->mutex); + + struct critnib_leaf *k = alloc_leaf(c); + if (!k) { + util_mutex_unlock(c->mutex); + + return ENOMEM; + } + + VALGRIND_HG_DRD_DISABLE_CHECKING(k, sizeof(struct critnib_leaf)); + + k->key = key; + k->value = value; + + struct critnib_node *kn = (void *)((word)k | 1); + + struct critnib_node *n = c->root; + if (!n) { + c->root = kn; + + util_mutex_unlock(c->mutex); + + return 0; + } + + struct critnib_node **parent = &c->root; + struct critnib_node *prev = c->root; + + while (n && !is_leaf(n) && (key & path_mask(n->shift)) == n->path) { + prev = n; + parent = &n->child[slice_index(key, n->shift)]; + n = *parent; + } + + if (!n) { + n = prev; + store(&n->child[slice_index(key, n->shift)], kn); + + util_mutex_unlock(c->mutex); + + return 0; + } + + word path = is_leaf(n) ? to_leaf(n)->key : n->path; + /* Find where the path differs from our key. */ + word at = path ^ key; + if (!at) { + ASSERT(is_leaf(n)); + free_leaf(c, to_leaf(kn)); + + if (update) { + to_leaf(n)->value = value; + util_mutex_unlock(c->mutex); + return 0; + } else { + util_mutex_unlock(c->mutex); + return EEXIST; + } + } + + /* and convert that to an index. */ + sh_t sh = util_mssb_index(at) & (sh_t) ~(SLICE - 1); + + struct critnib_node *m = alloc_node(c); + if (!m) { + free_leaf(c, to_leaf(kn)); + + util_mutex_unlock(c->mutex); + + return ENOMEM; + } + VALGRIND_HG_DRD_DISABLE_CHECKING(m, sizeof(struct critnib_node)); + + for (int i = 0; i < SLNODES; i++) { + m->child[i] = NULL; + } + + m->child[slice_index(key, sh)] = kn; + m->child[slice_index(path, sh)] = n; + m->shift = sh; + m->path = key & path_mask(sh); + store(parent, m); + + util_mutex_unlock(c->mutex); + + return 0; +} + +/* + * critnib_remove -- delete a key from the critnib structure, return its value + */ +void *critnib_remove(struct critnib *c, word key) { + struct critnib_leaf *k; + void *value = NULL; + + util_mutex_lock(c->mutex); + + struct critnib_node *n = c->root; + if (!n) { + goto not_found; + } + + word del = (util_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; + free_node(c, c->pending_del_nodes[del]); + free_leaf(c, c->pending_del_leaves[del]); + c->pending_del_nodes[del] = NULL; + c->pending_del_leaves[del] = NULL; + + if (is_leaf(n)) { + k = to_leaf(n); + if (k->key == key) { + store(&c->root, NULL); + goto del_leaf; + } + + goto not_found; + } + /* + * n and k are a parent:child pair (after the first iteration); k is the + * leaf that holds the key we're deleting. + */ + struct critnib_node **k_parent = &c->root; + struct critnib_node **n_parent = &c->root; + struct critnib_node *kn = n; + + while (!is_leaf(kn)) { + n_parent = k_parent; + n = kn; + k_parent = &kn->child[slice_index(key, kn->shift)]; + kn = *k_parent; + + if (!kn) { + goto not_found; + } + } + + k = to_leaf(kn); + if (k->key != key) { + goto not_found; + } + + store(&n->child[slice_index(key, n->shift)], NULL); + + /* Remove the node if there's only one remaining child. */ + int ochild = -1; + for (int i = 0; i < SLNODES; i++) { + if (n->child[i]) { + if (ochild != -1) { + goto del_leaf; + } + + ochild = i; + } + } + + ASSERTne(ochild, -1); + + store(n_parent, n->child[ochild]); + c->pending_del_nodes[del] = n; + +del_leaf: + value = k->value; + c->pending_del_leaves[del] = k; + +not_found: + util_mutex_unlock(c->mutex); + return value; +} + +/* + * critnib_get -- query for a key ("==" match), returns value or NULL + * + * Doesn't need a lock but if many deletes happened while our thread was + * somehow stalled the query is restarted (as freed nodes remain unused only + * for a grace period). + * + * Counterintuitively, it's pointless to return the most current answer, + * we need only one that was valid at any point after the call started. + */ +void *critnib_get(struct critnib *c, word key) { + uint64_t wrs1, wrs2; + void *res; + + do { + struct critnib_node *n; + + load64(&c->remove_count, &wrs1); + load(&c->root, &n); + + /* + * critbit algorithm: dive into the tree, looking at nothing but + * each node's critical bit^H^H^Hnibble. This means we risk + * going wrong way if our path is missing, but that's ok... + */ + while (n && !is_leaf(n)) { + load(&n->child[slice_index(key, n->shift)], &n); + } + + /* ... as we check it at the end. */ + struct critnib_leaf *k = to_leaf(n); + res = (n && k->key == key) ? k->value : NULL; + load64(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + return res; +} + +/* + * internal: find_predecessor -- return the rightmost leaf in a subtree + */ +static struct critnib_leaf * +find_predecessor(struct critnib_node *__restrict n) { + while (1) { + int nib; + for (nib = NIB; nib >= 0; nib--) { + if (n->child[nib]) { + break; + } + } + + if (nib < 0) { + return NULL; + } + + n = n->child[nib]; + if (is_leaf(n)) { + return to_leaf(n); + } + } +} + +/* + * internal: find_le -- recursively search <= in a subtree + */ +static struct critnib_leaf *find_le(struct critnib_node *__restrict n, + word key) { + if (!n) { + return NULL; + } + + if (is_leaf(n)) { + struct critnib_leaf *k = to_leaf(n); + return (k->key <= key) ? k : NULL; + } + + /* + * is our key outside the subtree we're in? + * + * If we're inside, all bits above the nib will be identical; note + * that shift points at the nib's lower rather than upper edge, so it + * needs to be masked away as well. + */ + if ((key ^ n->path) >> (n->shift) & ~NIB) { + /* + * subtree is too far to the left? + * -> its rightmost value is good + */ + if (n->path < key) { + return find_predecessor(n); + } + + /* + * subtree is too far to the right? + * -> it has nothing of interest to us + */ + return NULL; + } + + unsigned nib = slice_index(key, n->shift); + /* recursive call: follow the path */ + { + struct critnib_node *m; + load(&n->child[nib], &m); + struct critnib_leaf *k = find_le(m, key); + if (k) { + return k; + } + } + + /* + * nothing in that subtree? We strayed from the path at this point, + * thus need to search every subtree to our left in this node. No + * need to dive into any but the first non-null, though. + */ + for (; nib > 0; nib--) { + struct critnib_node *m; + load(&n->child[nib - 1], &m); + if (m) { + n = m; + if (is_leaf(n)) { + return to_leaf(n); + } + + return find_predecessor(n); + } + } + + return NULL; +} + +/* + * critnib_find_le -- query for a key ("<=" match), returns value or NULL + * + * Same guarantees as critnib_get(). + */ +void *critnib_find_le(struct critnib *c, word key) { + uint64_t wrs1, wrs2; + void *res; + + do { + load64(&c->remove_count, &wrs1); + struct critnib_node *n; /* avoid a subtle TOCTOU */ + load(&c->root, &n); + struct critnib_leaf *k = n ? find_le(n, key) : NULL; + res = k ? k->value : NULL; + load64(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + return res; +} + +/* + * internal: find_successor -- return the rightmost leaf in a subtree + */ +static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { + while (1) { + unsigned nib; + for (nib = 0; nib <= NIB; nib++) { + if (n->child[nib]) { + break; + } + } + + if (nib > NIB) { + return NULL; + } + + n = n->child[nib]; + if (is_leaf(n)) { + return to_leaf(n); + } + } +} + +/* + * internal: find_ge -- recursively search >= in a subtree + */ +static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, + word key) { + if (!n) { + return NULL; + } + + if (is_leaf(n)) { + struct critnib_leaf *k = to_leaf(n); + return (k->key >= key) ? k : NULL; + } + + if ((key ^ n->path) >> (n->shift) & ~NIB) { + if (n->path > key) { + return find_successor(n); + } + + return NULL; + } + + unsigned nib = slice_index(key, n->shift); + { + struct critnib_node *m; + load(&n->child[nib], &m); + struct critnib_leaf *k = find_ge(m, key); + if (k) { + return k; + } + } + + for (; nib < NIB; nib++) { + struct critnib_node *m; + load(&n->child[nib + 1], &m); + if (m) { + n = m; + if (is_leaf(n)) { + return to_leaf(n); + } + + return find_successor(n); + } + } + + return NULL; +} + +/* + * critnib_find -- parametrized query, returns 1 if found + */ +int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, + uintptr_t *rkey, void **rvalue) { + uint64_t wrs1, wrs2; + struct critnib_leaf *k; + uintptr_t _rkey; + void **_rvalue; + + /* <42 ≡ ≤41 */ + if (dir < -1) { + if (!key) { + return 0; /* no key is <0 */ + } + key--; + } else if (dir > +1) { + if (key == (uintptr_t)-1) { + return 0; /* no key is >(unsigned)∞ */ + } + key++; + } + + do { + load64(&c->remove_count, &wrs1); + struct critnib_node *n; + load(&c->root, &n); + + if (dir < 0) { + k = find_le(n, key); + } else if (dir > 0) { + k = find_ge(n, key); + } else { + while (n && !is_leaf(n)) { + load(&n->child[slice_index(key, n->shift)], &n); + } + + struct critnib_leaf *kk = to_leaf(n); + k = (n && kk->key == key) ? kk : NULL; + } + if (k) { + _rkey = k->key; + _rvalue = k->value; + } + load64(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + if (k) { + if (rkey) { + *rkey = _rkey; + } + if (rvalue) { + *rvalue = _rvalue; + } + return 1; + } + + return 0; +} + +/* + * critnib_iter -- iterator, [min..max], calls func(key, value, privdata) + * + * If func() returns non-zero, the search is aborted. + */ +static int iter(struct critnib_node *__restrict n, word min, word max, + int (*func)(word key, void *value, void *privdata), + void *privdata) { + if (is_leaf(n)) { + word k = to_leaf(n)->key; + if (k >= min && k <= max) { + return func(to_leaf(n)->key, to_leaf(n)->value, privdata); + } + return 0; + } + + if (n->path > max) { + return 1; + } + if ((n->path | path_mask(n->shift)) < min) { + return 0; + } + + for (int i = 0; i < SLNODES; i++) { + struct critnib_node *__restrict m = n->child[i]; + if (m && iter(m, min, max, func, privdata)) { + return 1; + } + } + + return 0; +} + +void critnib_iter(critnib *c, uintptr_t min, uintptr_t max, + int (*func)(uintptr_t key, void *value, void *privdata), + void *privdata) { + util_mutex_lock(c->mutex); + if (c->root) { + iter(c->root, min, max, func, privdata); + } + util_mutex_unlock(c->mutex); +} diff --git a/source/common/unified_malloc_framework/src/critnib/critnib.h b/source/common/unified_malloc_framework/src/critnib/critnib.h new file mode 100644 index 0000000000..b7ce850871 --- /dev/null +++ b/source/common/unified_malloc_framework/src/critnib/critnib.h @@ -0,0 +1,48 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef CRITNIB_H +#define CRITNIB_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct critnib; +typedef struct critnib critnib; + +enum find_dir_t { + FIND_L = -2, + FIND_LE = -1, + FIND_EQ = 0, + FIND_GE = +1, + FIND_G = +2, +}; + +critnib *critnib_new(void); +void critnib_delete(critnib *c); + +int critnib_insert(critnib *c, uintptr_t key, void *value, int update); +void *critnib_remove(critnib *c, uintptr_t key); +void *critnib_get(critnib *c, uintptr_t key); +void *critnib_find_le(critnib *c, uintptr_t key); +int critnib_find(critnib *c, uintptr_t key, enum find_dir_t dir, + uintptr_t *rkey, void **rvalue); +void critnib_iter(critnib *c, uintptr_t min, uintptr_t max, + int (*func)(uintptr_t key, void *value, void *privdata), + void *privdata); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/common/unified_malloc_framework/src/memory_pool_default.c b/source/common/unified_malloc_framework/src/memory_pool_default.c index be7c4c9c57..b997b090cd 100644 --- a/source/common/unified_malloc_framework/src/memory_pool_default.c +++ b/source/common/unified_malloc_framework/src/memory_pool_default.c @@ -69,9 +69,15 @@ void umfPoolDestroy(umf_memory_pool_handle_t hPool) { free(hPool); } -enum umf_result_t umfFree(void *ptr) { return UMF_RESULT_ERROR_NOT_SUPPORTED; } +enum umf_result_t umfFree(void *ptr) { + (void)ptr; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} -umf_memory_pool_handle_t umfPoolByPtr(const void *ptr) { return NULL; } +umf_memory_pool_handle_t umfPoolByPtr(const void *ptr) { + (void)ptr; + return NULL; +} enum umf_result_t umfPoolGetMemoryProviders(umf_memory_pool_handle_t hPool, size_t numProviders, diff --git a/source/common/unified_malloc_framework/src/memory_tracker.cpp b/source/common/unified_malloc_framework/src/memory_tracker.c similarity index 69% rename from source/common/unified_malloc_framework/src/memory_tracker.cpp rename to source/common/unified_malloc_framework/src/memory_tracker.c index adbe2aa5e9..76b0b7b745 100644 --- a/source/common/unified_malloc_framework/src/memory_tracker.cpp +++ b/source/common/unified_malloc_framework/src/memory_tracker.c @@ -9,112 +9,99 @@ */ #include "memory_tracker.h" +#include "critnib/critnib.h" + +#include #include #include -#include -#include -#include -#include +#include +#include #include -#ifdef _WIN32 -#include -#endif - -// TODO: reimplement in C and optimize... -struct umf_memory_tracker_t { - enum umf_result_t add(void *pool, const void *ptr, size_t size) { - std::unique_lock lock(mtx); +#if !defined(_WIN32) +critnib *TRACKER = NULL; +void __attribute__((constructor)) createLibTracker(void) { + TRACKER = critnib_new(); +} +void __attribute__((destructor)) deleteLibTracker(void) { + critnib_delete(TRACKER); +} - if (size == 0) { - return UMF_RESULT_SUCCESS; - } +umf_memory_tracker_handle_t umfMemoryTrackerGet(void) { + return (umf_memory_tracker_handle_t)TRACKER; +} +#endif - auto ret = - map.try_emplace(reinterpret_cast(ptr), size, pool); - return ret.second ? UMF_RESULT_SUCCESS : UMF_RESULT_ERROR_UNKNOWN; - } +struct tracker_value_t { + umf_memory_pool_handle_t pool; + size_t size; +}; - enum umf_result_t remove(const void *ptr, size_t size) { - std::unique_lock lock(mtx); +static enum umf_result_t +umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool, const void *ptr, + size_t size) { + assert(ptr); - map.erase(reinterpret_cast(ptr)); + struct tracker_value_t *value = + (struct tracker_value_t *)malloc(sizeof(struct tracker_value_t)); + value->pool = pool; + value->size = size; - // TODO: handle removing part of the range - (void)size; + int ret = critnib_insert((critnib *)hTracker, (uintptr_t)ptr, value, 0); + if (ret == 0) { return UMF_RESULT_SUCCESS; } - void *find(const void *ptr) { - std::shared_lock lock(mtx); - - auto intptr = reinterpret_cast(ptr); - auto it = map.upper_bound(intptr); - if (it == map.begin()) { - return nullptr; - } - - --it; - - auto address = it->first; - auto size = it->second.first; - auto pool = it->second.second; - - if (intptr >= address && intptr < address + size) { - return pool; - } + free(value); - return nullptr; + if (ret == ENOMEM) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - private: - std::shared_mutex mtx; - std::map> map; -}; - -static enum umf_result_t -umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, void *pool, - const void *ptr, size_t size) { - return hTracker->add(pool, ptr, size); + // This should not happen + // TODO: add logging here + return UMF_RESULT_ERROR_UNKNOWN; } static enum umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, const void *ptr, size_t size) { - return hTracker->remove(ptr, size); -} + assert(ptr); + + // TODO: there is no support for removing partial ranges (or multipe entires + // in a single remove call) yet. + // Every umfMemoryTrackerAdd(..., ptr, ...) should have a corresponsding + // umfMemoryTrackerRemove call with the same ptr value. + (void)size; + + void *value = critnib_remove((critnib *)hTracker, (uintptr_t)ptr); + if (!value) { + // This should not happen + // TODO: add logging here + return UMF_RESULT_ERROR_UNKNOWN; + } -extern "C" { + free(value); -#if defined(_WIN32) && defined(UMF_SHARED_LIBRARY) -umf_memory_tracker_t *tracker = nullptr; -BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { - if (fdwReason == DLL_PROCESS_DETACH) { - delete tracker; - } else if (fdwReason == DLL_PROCESS_ATTACH) { - tracker = new umf_memory_tracker_t; - } - return TRUE; -} -#elif defined(_WIN32) -umf_memory_tracker_t trackerInstance; -umf_memory_tracker_t *tracker = &trackerInstance; -#else -umf_memory_tracker_t *tracker = nullptr; -void __attribute__((constructor)) createLibTracker() { - tracker = new umf_memory_tracker_t; + return UMF_RESULT_SUCCESS; } -void __attribute__((destructor)) deleteLibTracker() { delete tracker; } -#endif +umf_memory_pool_handle_t +umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, const void *ptr) { + assert(ptr); -umf_memory_tracker_handle_t umfMemoryTrackerGet(void) { return tracker; } + uintptr_t rkey; + struct tracker_value_t *rvalue; + int found = critnib_find((critnib *)hTracker, (uintptr_t)ptr, FIND_LE, + (void *)&rkey, (void **)&rvalue); + if (!found) { + return NULL; + } -void *umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, - const void *ptr) { - return hTracker->find(ptr); + return (rkey + rvalue->size >= (uintptr_t)ptr) ? rvalue->pool : NULL; } struct umf_tracking_memory_provider_t { @@ -136,7 +123,7 @@ static enum umf_result_t trackingAlloc(void *hProvider, size_t size, } ret = umfMemoryProviderAlloc(p->hUpstream, size, alignment, ptr); - if (ret != UMF_RESULT_SUCCESS) { + if (ret != UMF_RESULT_SUCCESS || !*ptr) { return ret; } @@ -159,9 +146,11 @@ static enum umf_result_t trackingFree(void *hProvider, void *ptr, size_t size) { // to avoid a race condition. If the order would be different, other thread // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove // resulting in inconsistent state. - ret = umfMemoryTrackerRemove(p->hTracker, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - return ret; + if (ptr) { + ret = umfMemoryTrackerRemove(p->hTracker, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } } ret = umfMemoryProviderFree(p->hUpstream, ptr, size); @@ -267,4 +256,3 @@ void umfTrackingMemoryProviderGetUpstreamProvider( (umf_tracking_memory_provider_t *)hTrackingProvider; *hUpstream = p->hUpstream; } -} diff --git a/source/common/unified_malloc_framework/src/memory_tracker.h b/source/common/unified_malloc_framework/src/memory_tracker.h index 43a95cf0cd..c16844928e 100644 --- a/source/common/unified_malloc_framework/src/memory_tracker.h +++ b/source/common/unified_malloc_framework/src/memory_tracker.h @@ -22,8 +22,8 @@ extern "C" { typedef struct umf_memory_tracker_t *umf_memory_tracker_handle_t; umf_memory_tracker_handle_t umfMemoryTrackerGet(void); -void *umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, - const void *ptr); +umf_memory_pool_handle_t +umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, const void *ptr); // Creates a memory provider that tracks each allocation/deallocation through umf_memory_tracker_handle_t and // forwards all requests to hUpstream memory Provider. hUpstream lifetime should be managed by the user of this function. diff --git a/source/common/unified_malloc_framework/src/memory_tracker_windows.cpp b/source/common/unified_malloc_framework/src/memory_tracker_windows.cpp new file mode 100644 index 0000000000..b5545f3490 --- /dev/null +++ b/source/common/unified_malloc_framework/src/memory_tracker_windows.cpp @@ -0,0 +1,37 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include "critnib/critnib.h" +#include "memory_tracker.h" + +#include +#if defined(UMF_SHARED_LIBRARY) +critnib *TRACKER = NULL; +BOOL APIENTRY DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpvReserved) { + if (fdwReason == DLL_PROCESS_DETACH) { + critnib_delete(TRACKER); + } else if (fdwReason == DLL_PROCESS_ATTACH) { + TRACKER = critnib_new(); + } + return TRUE; +} +#else +struct tracker_t { + tracker_t() { map = critnib_new(); } + ~tracker_t() { critnib_delete(map); } + critnib *map; +}; +tracker_t TRACKER_INSTANCE; +critnib *TRACKER = TRACKER_INSTANCE.map; +#endif + +umf_memory_tracker_handle_t umfMemoryTrackerGet(void) { + return (umf_memory_tracker_handle_t)TRACKER; +} diff --git a/source/common/unified_malloc_framework/src/utils/utils.h b/source/common/unified_malloc_framework/src/utils/utils.h new file mode 100644 index 0000000000..32d499787b --- /dev/null +++ b/source/common/unified_malloc_framework/src/utils/utils.h @@ -0,0 +1,110 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#include +#if defined(_WIN32) +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct os_mutex_t; + +struct os_mutex_t *util_mutex_create(void); +void util_mutex_destroy(struct os_mutex_t *mutex); +int util_mutex_lock(struct os_mutex_t *mutex); +int util_mutex_unlock(struct os_mutex_t *mutex); + +#if defined(_WIN32) +static __inline unsigned char util_lssb_index(long long value) { + unsigned long ret; + _BitScanForward64(&ret, value); + return (unsigned char)ret; +} +static __inline unsigned char util_mssb_index(long long value) { + unsigned long ret; + _BitScanReverse64(&ret, value); + return (unsigned char)ret; +} + +// There is no good way to do atomic_load on windows... +#define util_atomic_load_acquire(object, dest) \ + do { \ + *dest = InterlockedOr64Acquire((LONG64 volatile *)object, 0); \ + } while (0) + +#define util_atomic_store_release(object, desired) \ + InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) +#define util_atomic_increment(object) \ + InterlockedIncrement64((LONG64 volatile *)object) +#else +#define util_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) +#define util_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) +#define util_atomic_load_acquire(object, dest) \ + __atomic_load(object, dest, memory_order_acquire) +#define util_atomic_store_release(object, desired) \ + __atomic_store_n(object, desired, memory_order_release) +#define util_atomic_increment(object) \ + __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) +#endif + +#define Malloc malloc +#define Free free + +static inline void *Zalloc(size_t s) { + void *m = Malloc(s); + if (m) { + memset(m, 0, s); + } + return m; +} + +#define NOFUNCTION \ + do { \ + } while (0) +#define VALGRIND_ANNOTATE_NEW_MEMORY(p, s) NOFUNCTION +#define VALGRIND_HG_DRD_DISABLE_CHECKING(p, s) NOFUNCTION + +#ifdef NDEBUG +#define ASSERT(x) NOFUNCTION +#define ASSERTne(x, y) ASSERT(x != y) +#else +#define ASSERT(x) \ + do \ + if (!(x)) { \ + fprintf(stderr, \ + "Assertion failed: " #x " at " __FILE__ " line %d.\n", \ + __LINE__); \ + abort(); \ + } \ + while (0) +#define ASSERTne(x, y) \ + do { \ + long X = (x); \ + long Y = (y); \ + if (X == Y) { \ + fprintf(stderr, \ + "Assertion failed: " #x " != " #y \ + ", both are %ld, at " __FILE__ " line %d.\n", \ + X, __LINE__); \ + abort(); \ + } \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif diff --git a/source/common/unified_malloc_framework/src/utils/utils_posix.c b/source/common/unified_malloc_framework/src/utils/utils_posix.c new file mode 100644 index 0000000000..d03bb366a1 --- /dev/null +++ b/source/common/unified_malloc_framework/src/utils/utils_posix.c @@ -0,0 +1,35 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include + +#include "utils.h" + +struct os_mutex_t *util_mutex_create(void) { + pthread_mutex_t *mutex = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t)); + int ret = pthread_mutex_init(mutex, NULL); + return ret == 0 ? ((struct os_mutex_t *)mutex) : NULL; +} + +void util_mutex_destroy(struct os_mutex_t *m) { + pthread_mutex_t *mutex = (pthread_mutex_t *)m; + int ret = pthread_mutex_destroy(mutex); + (void)ret; // TODO: add logging + free(m); +} + +int util_mutex_lock(struct os_mutex_t *m) { + return pthread_mutex_lock((pthread_mutex_t *)m); +} + +int util_mutex_unlock(struct os_mutex_t *m) { + return pthread_mutex_unlock((pthread_mutex_t *)m); +} diff --git a/source/common/unified_malloc_framework/src/utils/utils_windows.cpp b/source/common/unified_malloc_framework/src/utils/utils_windows.cpp new file mode 100644 index 0000000000..b5db557c77 --- /dev/null +++ b/source/common/unified_malloc_framework/src/utils/utils_windows.cpp @@ -0,0 +1,33 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include + +#include "utils.h" + +struct os_mutex_t *util_mutex_create(void) { + return reinterpret_cast(new std::mutex); +} + +void util_mutex_destroy(struct os_mutex_t *mutex) { + delete reinterpret_cast(mutex); +} + +int util_mutex_lock(struct os_mutex_t *mutex) try { + reinterpret_cast(mutex)->lock(); + return 0; +} catch (std::system_error &err) { + return err.code().value(); +} + +int util_mutex_unlock(struct os_mutex_t *mutex) { + reinterpret_cast(mutex)->unlock(); + return 0; +} diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index c4da5d149f..2215bd0575 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -11,11 +11,17 @@ #ifndef USM_POOL_MANAGER_HPP #define USM_POOL_MANAGER_HPP 1 +#include "logger/ur_logger.hpp" +#include "umf_helpers.hpp" +#include "umf_pools/disjoint_pool.hpp" #include "ur_api.h" -#include "ur_pool_manager.hpp" #include "ur_util.hpp" +#include +#include + #include +#include #include namespace usm { @@ -29,8 +35,9 @@ struct pool_descriptor { ur_usm_type_t type; bool deviceReadOnly; - static bool equal(const pool_descriptor &lhs, const pool_descriptor &rhs); - static std::size_t hash(const pool_descriptor &desc); + bool operator==(const pool_descriptor &other) const; + friend std::ostream &operator<<(std::ostream &os, + const pool_descriptor &desc); static std::pair> create(ur_usm_pool_handle_t poolHandle, ur_context_handle_t hContext); }; @@ -45,8 +52,8 @@ urGetSubDevices(ur_device_handle_t hDevice) { } ur_device_partition_property_t prop; - prop.type = UR_DEVICE_PARTITION_EQUALLY; - prop.value.equally = nComputeUnits; + prop.type = UR_DEVICE_PARTITION_BY_CSLICE; + prop.value.affinity_domain = 0; ur_device_partition_properties_t properties{ UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, @@ -75,10 +82,10 @@ urGetSubDevices(ur_device_handle_t hDevice) { inline std::pair> urGetAllDevicesAndSubDevices(ur_context_handle_t hContext) { - size_t deviceCount; + size_t deviceCount = 0; auto ret = urContextGetInfo(hContext, UR_CONTEXT_INFO_NUM_DEVICES, sizeof(deviceCount), &deviceCount, nullptr); - if (ret != UR_RESULT_SUCCESS) { + if (ret != UR_RESULT_SUCCESS || deviceCount == 0) { return {ret, {}}; } @@ -110,6 +117,11 @@ urGetAllDevicesAndSubDevices(ur_context_handle_t hContext) { for (size_t i = 0; i < deviceCount; i++) { ret = addPoolsForDevicesRec(devices[i]); if (ret != UR_RESULT_SUCCESS) { + if (ret == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + // Return main devices when sub-devices are unsupported. + return {ret, std::move(devices)}; + } + return {ret, {}}; } } @@ -122,22 +134,28 @@ isSharedAllocationReadOnlyOnDevice(const pool_descriptor &desc) { return desc.type == UR_USM_TYPE_SHARED && desc.deviceReadOnly; } -inline bool pool_descriptor::equal(const pool_descriptor &lhs, - const pool_descriptor &rhs) { - ur_native_handle_t lhsNative, rhsNative; +inline bool pool_descriptor::operator==(const pool_descriptor &other) const { + const pool_descriptor &lhs = *this; + const pool_descriptor &rhs = other; + ur_native_handle_t lhsNative = nullptr, rhsNative = nullptr; // We want to share a memory pool for sub-devices and sub-sub devices. // Sub-devices and sub-sub-devices might be represented by different ur_device_handle_t but // they share the same native_handle_t (which is used by UMF provider). // Ref: https://github.com/intel/llvm/commit/86511c5dc84b5781dcfd828caadcb5cac157eae1 // TODO: is this L0 specific? - auto ret = urDeviceGetNativeHandle(lhs.hDevice, &lhsNative); - if (ret != UR_RESULT_SUCCESS) { - throw ret; + if (lhs.hDevice) { + auto ret = urDeviceGetNativeHandle(lhs.hDevice, &lhsNative); + if (ret != UR_RESULT_SUCCESS) { + throw ret; + } } - ret = urDeviceGetNativeHandle(rhs.hDevice, &rhsNative); - if (ret != UR_RESULT_SUCCESS) { - throw ret; + + if (rhs.hDevice) { + auto ret = urDeviceGetNativeHandle(rhs.hDevice, &rhsNative); + if (ret != UR_RESULT_SUCCESS) { + throw ret; + } } return lhsNative == rhsNative && lhs.type == rhs.type && @@ -146,16 +164,12 @@ inline bool pool_descriptor::equal(const pool_descriptor &lhs, lhs.poolHandle == rhs.poolHandle; } -inline std::size_t pool_descriptor::hash(const pool_descriptor &desc) { - ur_native_handle_t native; - auto ret = urDeviceGetNativeHandle(desc.hDevice, &native); - if (ret != UR_RESULT_SUCCESS) { - throw ret; - } - - return combine_hashes(0, desc.type, native, - isSharedAllocationReadOnlyOnDevice(desc), - desc.poolHandle); +inline std::ostream &operator<<(std::ostream &os, const pool_descriptor &desc) { + os << "pool handle: " << desc.poolHandle + << " context handle: " << desc.hContext + << " device handle: " << desc.hDevice << " memory type: " << desc.type + << " is read only: " << desc.deviceReadOnly; + return os; } inline std::pair> @@ -177,6 +191,7 @@ pool_descriptor::create(ur_usm_pool_handle_t poolHandle, pool_descriptor &desc = descriptors.emplace_back(); desc.poolHandle = poolHandle; desc.hContext = hContext; + desc.hDevice = device; desc.type = UR_USM_TYPE_DEVICE; } { @@ -200,6 +215,69 @@ pool_descriptor::create(ur_usm_pool_handle_t poolHandle, return {ret, descriptors}; } +template struct pool_manager { + private: + using desc_to_pool_map_t = std::unordered_map; + + desc_to_pool_map_t descToPoolMap; + + public: + static std::pair + create(desc_to_pool_map_t descToHandleMap = {}) { + auto manager = pool_manager(); + + for (auto &[desc, hPool] : descToHandleMap) { + auto ret = manager.addPool(desc, hPool); + if (ret != UR_RESULT_SUCCESS) { + return {ret, pool_manager()}; + } + } + + return {UR_RESULT_SUCCESS, std::move(manager)}; + } + + ur_result_t addPool(const D &desc, + umf::pool_unique_handle_t &hPool) noexcept { + if (!descToPoolMap.try_emplace(desc, std::move(hPool)).second) { + logger::error("Pool for pool descriptor: {}, already exists", desc); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UR_RESULT_SUCCESS; + } + + std::optional getPool(const D &desc) noexcept { + auto it = descToPoolMap.find(desc); + if (it == descToPoolMap.end()) { + logger::error("Pool descriptor doesn't match any existing pool: {}", + desc); + return std::nullopt; + } + + return it->second.get(); + } +}; + } // namespace usm +namespace std { +/// @brief hash specialization for usm::pool_descriptor +template <> struct hash { + inline size_t operator()(const usm::pool_descriptor &desc) const { + ur_native_handle_t native = nullptr; + if (desc.hDevice) { + auto ret = urDeviceGetNativeHandle(desc.hDevice, &native); + if (ret != UR_RESULT_SUCCESS) { + throw ret; + } + } + + return combine_hashes(0, desc.type, native, + isSharedAllocationReadOnlyOnDevice(desc), + desc.poolHandle); + } +}; + +} // namespace std + #endif /* USM_POOL_MANAGER_HPP */ diff --git a/source/common/ur_singleton.hpp b/source/common/ur_singleton.hpp index d757bb197c..6440e3ac7f 100644 --- a/source/common/ur_singleton.hpp +++ b/source/common/ur_singleton.hpp @@ -31,7 +31,8 @@ template class singleton_factory_t { ////////////////////////////////////////////////////////////////////////// /// extract the key from parameter list and if necessary, convert type - template key_t getKey(key_tn key, Ts &&...params) { + template + key_t getKey(key_tn key, [[maybe_unused]] Ts &&...params) { return reinterpret_cast(key); } diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index db796612ea..d4f5bc73a5 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -88,6 +88,7 @@ target_sources(ur_loader ${CMAKE_CURRENT_SOURCE_DIR}/ur_libddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_lib.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_lib.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ur_codeloc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/validation/ur_valddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/validation/ur_validation_layer.cpp ) diff --git a/source/loader/layers/tracing/ur_tracing_layer.cpp b/source/loader/layers/tracing/ur_tracing_layer.cpp index b022ae831f..dd36b286f2 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.cpp +++ b/source/loader/layers/tracing/ur_tracing_layer.cpp @@ -14,6 +14,7 @@ #include "ur_util.hpp" #include "xpti/xpti_data_types.h" #include "xpti/xpti_trace_framework.h" +#include #include namespace ur_tracing_layer { @@ -23,6 +24,8 @@ constexpr auto CALL_STREAM_NAME = "ur"; constexpr auto STREAM_VER_MAJOR = UR_MAJOR_VERSION(UR_API_VERSION_CURRENT); constexpr auto STREAM_VER_MINOR = UR_MINOR_VERSION(UR_API_VERSION_CURRENT); +static thread_local xpti_td *activeEvent; + /////////////////////////////////////////////////////////////////////////////// context_t::context_t() { xptiFrameworkInitialize(); @@ -39,11 +42,21 @@ bool context_t::isAvailable() const { return xptiTraceEnabled(); } void context_t::notify(uint16_t trace_type, uint32_t id, const char *name, void *args, ur_result_t *resultp, uint64_t instance) { xpti::function_with_args_t payload{id, name, args, resultp, nullptr}; - xptiNotifySubscribers(call_stream_id, trace_type, nullptr, nullptr, + xptiNotifySubscribers(call_stream_id, trace_type, nullptr, activeEvent, instance, &payload); } uint64_t context_t::notify_begin(uint32_t id, const char *name, void *args) { + if (auto loc = codelocData.get_codeloc()) { + xpti::payload_t payload = + xpti::payload_t(loc->functionName, loc->sourceFile, loc->lineNumber, + loc->columnNumber, nullptr); + uint64_t InstanceNumber{}; + activeEvent = xptiMakeEvent("Unified Runtime call", &payload, + xpti::trace_graph_event, xpti_at::active, + &InstanceNumber); + } + uint64_t instance = xptiGetUniqueId(); notify((uint16_t)xpti::trace_point_type_t::function_with_args_begin, id, name, args, nullptr, instance); diff --git a/source/loader/layers/tracing/ur_tracing_layer.hpp b/source/loader/layers/tracing/ur_tracing_layer.hpp index b00d12d301..ddda493c05 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.hpp +++ b/source/loader/layers/tracing/ur_tracing_layer.hpp @@ -24,6 +24,7 @@ namespace ur_tracing_layer { class __urdlllocal context_t : public proxy_layer_context_t { public: ur_dditable_t urDdiTable = {}; + codeloc_data codelocData; context_t(); ~context_t(); @@ -32,7 +33,9 @@ class __urdlllocal context_t : public proxy_layer_context_t { std::vector getNames() const override { return {name}; } ur_result_t init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) override; + const std::set &enabledLayerNames, + codeloc_data codelocData) override; + ur_result_t tearDown() override { return UR_RESULT_SUCCESS; } uint64_t notify_begin(uint32_t id, const char *name, void *args); void notify_end(uint32_t id, const char *name, void *args, ur_result_t *resultp, uint64_t instance); diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index beb32b715d..402b64d638 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -15,54 +15,6 @@ #include namespace ur_tracing_layer { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. -) { - auto pfnInit = context.urDdiTable.Global.pfnInit; - - if (nullptr == pfnInit) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - ur_init_params_t params = {&device_flags, &hLoaderConfig}; - uint64_t instance = - context.notify_begin(UR_FUNCTION_INIT, "urInit", ¶ms); - - ur_result_t result = pfnInit(device_flags, hLoaderConfig); - - context.notify_end(UR_FUNCTION_INIT, "urInit", ¶ms, &result, instance); - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { - auto pfnTearDown = context.urDdiTable.Global.pfnTearDown; - - if (nullptr == pfnTearDown) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - ur_tear_down_params_t params = {&pParams}; - uint64_t instance = - context.notify_begin(UR_FUNCTION_TEAR_DOWN, "urTearDown", ¶ms); - - ur_result_t result = pfnTearDown(pParams); - - context.notify_end(UR_FUNCTION_TEAR_DOWN, "urTearDown", ¶ms, &result, - instance); - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -405,8 +357,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -3373,7 +3325,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -3415,7 +3368,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -3460,7 +3414,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -3527,7 +3482,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -3597,9 +3553,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -3638,9 +3596,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -3694,10 +3654,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -3741,7 +3702,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -3789,7 +3751,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -3837,9 +3800,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -3885,7 +3850,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -3968,7 +3934,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -3993,14 +3959,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( } ur_enqueue_usm_fill_params_t params = { - &hQueue, &ptr, &patternSize, + &hQueue, &pMem, &patternSize, &pPattern, &size, &numEventsInWaitList, &phEventWaitList, &phEvent}; uint64_t instance = context.notify_begin(UR_FUNCTION_ENQUEUE_USM_FILL, "urEnqueueUSMFill", ¶ms); ur_result_t result = - pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); context.notify_end(UR_FUNCTION_ENQUEUE_USM_FILL, "urEnqueueUSMFill", @@ -4014,9 +3980,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4052,9 +4020,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4091,9 +4060,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -4122,7 +4092,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -4172,10 +4143,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -4386,7 +4360,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { auto pfnWriteHostPipe = context.urDdiTable.Enqueue.pfnWriteHostPipe; @@ -5232,8 +5206,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -5246,34 +5220,77 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMemcpyUSMExp = - context.urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; + auto pfnAppendUSMMemcpyExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_memcpy_usm_exp_params_t params = { + ur_command_buffer_append_usm_memcpy_exp_params_t params = { &hCommandBuffer, &pDst, &pSrc, &size, &numSyncPointsInWaitList, &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = - context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP, - "urCommandBufferAppendMemcpyUSMExp", ¶ms); + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP, + "urCommandBufferAppendUSMMemcpyExp", ¶ms); - ur_result_t result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + ur_result_t result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP, - "urCommandBufferAppendMemcpyUSMExp", ¶ms, &result, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP, + "urCommandBufferAppendUSMMemcpyExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_usm_fill_exp_params_t params = { + &hCommandBuffer, &pMemory, &pPattern, + &patternSize, &size, &numSyncPointsInWaitList, + &pSyncPointWaitList, &pSyncPoint}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP, + "urCommandBufferAppendUSMFillExp", ¶ms); + + ur_result_t result = pfnAppendUSMFillExp( + hCommandBuffer, pMemory, pPattern, patternSize, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP, + "urCommandBufferAppendUSMFillExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -5288,14 +5305,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; + auto pfnAppendMemBufferCopyExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_copy_exp_params_t params = { + ur_command_buffer_append_mem_buffer_copy_exp_params_t params = { &hCommandBuffer, &hSrcMem, &hDstMem, @@ -5306,23 +5323,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP, - "urCommandBufferAppendMembufferCopyExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP, + "urCommandBufferAppendMemBufferCopyExp", ¶ms); - ur_result_t result = pfnAppendMembufferCopyExp( + ur_result_t result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP, - "urCommandBufferAppendMembufferCopyExp", ¶ms, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP, + "urCommandBufferAppendMemBufferCopyExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5337,14 +5354,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; + auto pfnAppendMemBufferWriteExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_write_exp_params_t params = { + ur_command_buffer_append_mem_buffer_write_exp_params_t params = { &hCommandBuffer, &hBuffer, &offset, @@ -5354,23 +5371,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP, - "urCommandBufferAppendMembufferWriteExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP, + "urCommandBufferAppendMemBufferWriteExp", ¶ms); - ur_result_t result = pfnAppendMembufferWriteExp( + ur_result_t result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP, - "urCommandBufferAppendMembufferWriteExp", ¶ms, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP, + "urCommandBufferAppendMemBufferWriteExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5384,14 +5401,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; + auto pfnAppendMemBufferReadExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_read_exp_params_t params = { + ur_command_buffer_append_mem_buffer_read_exp_params_t params = { &hCommandBuffer, &hBuffer, &offset, @@ -5401,23 +5418,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP, - "urCommandBufferAppendMembufferReadExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP, + "urCommandBufferAppendMemBufferReadExp", ¶ms); - ur_result_t result = pfnAppendMembufferReadExp( + ur_result_t result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP, - "urCommandBufferAppendMembufferReadExp", ¶ms, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP, + "urCommandBufferAppendMemBufferReadExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -5439,14 +5456,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyRectExp; + auto pfnAppendMemBufferCopyRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_copy_rect_exp_params_t params = { + ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t params = { &hCommandBuffer, &hSrcMem, &hDstMem, @@ -5461,25 +5478,25 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP, - "urCommandBufferAppendMembufferCopyRectExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP, + "urCommandBufferAppendMemBufferCopyRectExp", ¶ms); - ur_result_t result = pfnAppendMembufferCopyRectExp( + ur_result_t result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); context.notify_end( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP, - "urCommandBufferAppendMembufferCopyRectExp", ¶ms, &result, + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP, + "urCommandBufferAppendMemBufferCopyRectExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5507,14 +5524,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteRectExp; + auto pfnAppendMemBufferWriteRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_write_rect_exp_params_t params = { + ur_command_buffer_append_mem_buffer_write_rect_exp_params_t params = { &hCommandBuffer, &hBuffer, &bufferOffset, @@ -5529,25 +5546,25 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP, - "urCommandBufferAppendMembufferWriteRectExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP, + "urCommandBufferAppendMemBufferWriteRectExp", ¶ms); - ur_result_t result = pfnAppendMembufferWriteRectExp( + ur_result_t result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); context.notify_end( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP, - "urCommandBufferAppendMembufferWriteRectExp", ¶ms, &result, + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP, + "urCommandBufferAppendMemBufferWriteRectExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5573,14 +5590,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadRectExp; + auto pfnAppendMemBufferReadRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_read_rect_exp_params_t params = { + ur_command_buffer_append_mem_buffer_read_rect_exp_params_t params = { &hCommandBuffer, &hBuffer, &bufferOffset, @@ -5595,22 +5612,162 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP, - "urCommandBufferAppendMembufferReadRectExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP, + "urCommandBufferAppendMemBufferReadRectExp", ¶ms); - ur_result_t result = pfnAppendMembufferReadRectExp( + ur_result_t result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); context.notify_end( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP, - "urCommandBufferAppendMembufferReadRectExp", ¶ms, &result, + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP, + "urCommandBufferAppendMemBufferReadRectExp", ¶ms, &result, instance); return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendMemBufferFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_mem_buffer_fill_exp_params_t params = { + &hCommandBuffer, + &hBuffer, + &pPattern, + &patternSize, + &offset, + &size, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP, + "urCommandBufferAppendMemBufferFillExp", ¶ms); + + ur_result_t result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP, + "urCommandBufferAppendMemBufferFillExp", ¶ms, + &result, instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMPrefetchExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_usm_prefetch_exp_params_t params = { + &hCommandBuffer, + &pMemory, + &size, + &flags, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP, + "urCommandBufferAppendUSMPrefetchExp", ¶ms); + + ur_result_t result = pfnAppendUSMPrefetchExp( + hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP, + "urCommandBufferAppendUSMPrefetchExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMAdviseExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_usm_advise_exp_params_t params = { + &hCommandBuffer, + &pMemory, + &size, + &advice, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP, + "urCommandBufferAppendUSMAdviseExp", ¶ms); + + ur_result_t result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, + advice, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP, + "urCommandBufferAppendUSMAdviseExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -5650,6 +5807,99 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + auto pfnCooperativeKernelLaunchExp = + context.urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_cooperative_kernel_launch_exp_params_t params = { + &hQueue, + &hKernel, + &workDim, + &pGlobalWorkOffset, + &pGlobalWorkSize, + &pLocalWorkSize, + &numEventsInWaitList, + &phEventWaitList, + &phEvent}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP, + "urEnqueueCooperativeKernelLaunchExp", ¶ms); + + ur_result_t result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); + + context.notify_end(UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP, + "urEnqueueCooperativeKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + auto pfnSuggestMaxCooperativeGroupCountExp = + context.urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_kernel_suggest_max_cooperative_group_count_exp_params_t params = { + &hKernel, &pGroupCountRet}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP, + "urKernelSuggestMaxCooperativeGroupCountExp", ¶ms); + + ur_result_t result = + pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + + context.notify_end( + UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP, + "urKernelSuggestMaxCooperativeGroupCountExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramBuildExp __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( @@ -5924,12 +6174,6 @@ __urdlllocal ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - dditable.pfnInit = pDdiTable->pfnInit; - pDdiTable->pfnInit = ur_tracing_layer::urInit; - - dditable.pfnTearDown = pDdiTable->pfnTearDown; - pDdiTable->pfnTearDown = ur_tracing_layer::urTearDown; - dditable.pfnAdapterGet = pDdiTable->pfnAdapterGet; pDdiTable->pfnAdapterGet = ur_tracing_layer::urAdapterGet; @@ -6093,36 +6337,52 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendKernelLaunchExp = ur_tracing_layer::urCommandBufferAppendKernelLaunchExp; - dditable.pfnAppendMemcpyUSMExp = pDdiTable->pfnAppendMemcpyUSMExp; - pDdiTable->pfnAppendMemcpyUSMExp = - ur_tracing_layer::urCommandBufferAppendMemcpyUSMExp; + dditable.pfnAppendUSMMemcpyExp = pDdiTable->pfnAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur_tracing_layer::urCommandBufferAppendUSMMemcpyExp; + + dditable.pfnAppendUSMFillExp = pDdiTable->pfnAppendUSMFillExp; + pDdiTable->pfnAppendUSMFillExp = + ur_tracing_layer::urCommandBufferAppendUSMFillExp; + + dditable.pfnAppendMemBufferCopyExp = pDdiTable->pfnAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur_tracing_layer::urCommandBufferAppendMemBufferCopyExp; - dditable.pfnAppendMembufferCopyExp = pDdiTable->pfnAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyExp = - ur_tracing_layer::urCommandBufferAppendMembufferCopyExp; + dditable.pfnAppendMemBufferWriteExp = pDdiTable->pfnAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur_tracing_layer::urCommandBufferAppendMemBufferWriteExp; - dditable.pfnAppendMembufferWriteExp = pDdiTable->pfnAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteExp = - ur_tracing_layer::urCommandBufferAppendMembufferWriteExp; + dditable.pfnAppendMemBufferReadExp = pDdiTable->pfnAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur_tracing_layer::urCommandBufferAppendMemBufferReadExp; - dditable.pfnAppendMembufferReadExp = pDdiTable->pfnAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadExp = - ur_tracing_layer::urCommandBufferAppendMembufferReadExp; + dditable.pfnAppendMemBufferCopyRectExp = + pDdiTable->pfnAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur_tracing_layer::urCommandBufferAppendMemBufferCopyRectExp; - dditable.pfnAppendMembufferCopyRectExp = - pDdiTable->pfnAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - ur_tracing_layer::urCommandBufferAppendMembufferCopyRectExp; + dditable.pfnAppendMemBufferWriteRectExp = + pDdiTable->pfnAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur_tracing_layer::urCommandBufferAppendMemBufferWriteRectExp; - dditable.pfnAppendMembufferWriteRectExp = - pDdiTable->pfnAppendMembufferWriteRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - ur_tracing_layer::urCommandBufferAppendMembufferWriteRectExp; + dditable.pfnAppendMemBufferReadRectExp = + pDdiTable->pfnAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur_tracing_layer::urCommandBufferAppendMemBufferReadRectExp; - dditable.pfnAppendMembufferReadRectExp = - pDdiTable->pfnAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - ur_tracing_layer::urCommandBufferAppendMembufferReadRectExp; + dditable.pfnAppendMemBufferFillExp = pDdiTable->pfnAppendMemBufferFillExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur_tracing_layer::urCommandBufferAppendMemBufferFillExp; + + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_tracing_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; @@ -6296,6 +6556,41 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.EnqueueExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnCooperativeKernelLaunchExp = + pDdiTable->pfnCooperativeKernelLaunchExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur_tracing_layer::urEnqueueCooperativeKernelLaunchExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses /// @@ -6429,6 +6724,41 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.KernelExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnSuggestMaxCooperativeGroupCountExp = + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur_tracing_layer::urKernelSuggestMaxCooperativeGroupCountExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses /// @@ -7049,13 +7379,16 @@ __urdlllocal ur_result_t UR_APICALL urGetDeviceProcAddrTable( } ur_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data codelocData) { ur_result_t result = UR_RESULT_SUCCESS; if (!enabledLayerNames.count(name)) { return result; } + ur_tracing_layer::context.codelocData = codelocData; + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetGlobalProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Global); @@ -7081,6 +7414,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Enqueue); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetEnqueueExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EnqueueExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetEventProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Event); @@ -7091,6 +7429,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Kernel); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetKernelExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->KernelExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &dditable->Mem); diff --git a/source/loader/layers/ur_proxy_layer.hpp b/source/loader/layers/ur_proxy_layer.hpp index 782a7e241b..2b710f3287 100644 --- a/source/loader/layers/ur_proxy_layer.hpp +++ b/source/loader/layers/ur_proxy_layer.hpp @@ -12,6 +12,7 @@ #ifndef UR_PROXY_LAYER_H #define UR_PROXY_LAYER_H 1 +#include "ur_codeloc.hpp" #include "ur_ddi.h" #include "ur_util.hpp" @@ -24,9 +25,10 @@ class __urdlllocal proxy_layer_context_t { virtual std::vector getNames() const = 0; virtual bool isAvailable() const = 0; - virtual ur_result_t - init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) = 0; + virtual ur_result_t init(ur_dditable_t *dditable, + const std::set &enabledLayerNames, + codeloc_data codelocData) = 0; + virtual ur_result_t tearDown() = 0; }; #endif /* UR_PROXY_LAYER_H */ diff --git a/source/loader/layers/validation/ur_leak_check.hpp b/source/loader/layers/validation/ur_leak_check.hpp index 475742fc75..3f3fb80b0b 100644 --- a/source/loader/layers/validation/ur_leak_check.hpp +++ b/source/loader/layers/validation/ur_leak_check.hpp @@ -24,6 +24,7 @@ struct RefCountContext { }; enum RefCountUpdateType { + REFCOUNT_CREATE_OR_INCREASE, REFCOUNT_CREATE, REFCOUNT_INCREASE, REFCOUNT_DECREASE, @@ -31,13 +32,25 @@ struct RefCountContext { std::mutex mutex; std::unordered_map counts; + int64_t adapterCount = 0; - void updateRefCount(void *ptr, enum RefCountUpdateType type) { + void updateRefCount(void *ptr, enum RefCountUpdateType type, + bool isAdapterHandle = false) { std::unique_lock ulock(mutex); auto it = counts.find(ptr); switch (type) { + case REFCOUNT_CREATE_OR_INCREASE: + if (it == counts.end()) { + counts[ptr] = {1, getCurrentBacktrace()}; + if (isAdapterHandle) { + adapterCount++; + } + } else { + counts[ptr].refCount++; + } + break; case REFCOUNT_CREATE: if (it == counts.end()) { counts[ptr] = {1, getCurrentBacktrace()}; @@ -65,6 +78,8 @@ struct RefCountContext { if (counts[ptr].refCount < 0) { context.logger.error( "Attempting to release nonexistent handle {}", ptr); + } else if (counts[ptr].refCount == 0 && isAdapterHandle) { + adapterCount--; } break; } @@ -75,17 +90,27 @@ struct RefCountContext { if (counts[ptr].refCount == 0) { counts.erase(ptr); } + + // No more active adapters, so any references still held are leaked + if (adapterCount == 0) { + logInvalidReferences(); + clear(); + } } public: void createRefCount(void *ptr) { updateRefCount(ptr, REFCOUNT_CREATE); } - void incrementRefCount(void *ptr) { - updateRefCount(ptr, REFCOUNT_INCREASE); + void incrementRefCount(void *ptr, bool isAdapterHandle = false) { + updateRefCount(ptr, REFCOUNT_INCREASE, isAdapterHandle); + } + + void decrementRefCount(void *ptr, bool isAdapterHandle = false) { + updateRefCount(ptr, REFCOUNT_DECREASE, isAdapterHandle); } - void decrementRefCount(void *ptr) { - updateRefCount(ptr, REFCOUNT_DECREASE); + void createOrIncrementRefCount(void *ptr, bool isAdapterHandle = false) { + updateRefCount(ptr, REFCOUNT_CREATE_OR_INCREASE, isAdapterHandle); } void clear() { counts.clear(); } diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ab1708c0b6..72e225028c 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -14,58 +14,6 @@ namespace ur_validation_layer { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. -) { - auto pfnInit = context.urDdiTable.Global.pfnInit; - - if (nullptr == pfnInit) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (context.enableParameterValidation) { - if (UR_DEVICE_INIT_FLAGS_MASK & device_flags) { - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - ur_result_t result = pfnInit(device_flags, hLoaderConfig); - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { - auto pfnTearDown = context.urDdiTable.Global.pfnTearDown; - - if (nullptr == pfnTearDown) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (context.enableParameterValidation) { - if (NULL == pParams) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - } - - ur_result_t result = pfnTearDown(pParams); - - if (context.enableLeakChecking) { - refCountContext.logInvalidReferences(); - refCountContext.clear(); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -92,6 +40,11 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( ur_result_t result = pfnAdapterGet(NumEntries, phAdapters, pNumAdapters); + if (context.enableLeakChecking && phAdapters && + result == UR_RESULT_SUCCESS) { + refCountContext.createOrIncrementRefCount(*phAdapters, true); + } + return result; } @@ -115,7 +68,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRelease( ur_result_t result = pfnAdapterRelease(hAdapter); if (context.enableLeakChecking && result == UR_RESULT_SUCCESS) { - refCountContext.decrementRefCount(hAdapter); + refCountContext.decrementRefCount(hAdapter, true); } return result; @@ -141,7 +94,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRetain( ur_result_t result = pfnAdapterRetain(hAdapter); if (context.enableLeakChecking && result == UR_RESULT_SUCCESS) { - refCountContext.incrementRefCount(hAdapter); + refCountContext.incrementRefCount(hAdapter, true); } return result; @@ -261,6 +214,10 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGet( if (NULL == phAdapters) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NumEntries == 0 && phPlatforms != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } ur_result_t result = @@ -445,8 +402,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -466,9 +423,17 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + if (NumEntries > 0 && phDevices == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + if (UR_DEVICE_TYPE_VPU < DeviceType) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + if (NumEntries == 0 && phDevices != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } ur_result_t result = @@ -609,6 +574,10 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( if (NULL == pProperties) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL == pProperties->pProperties) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } } ur_result_t result = pfnPartition(hDevice, pProperties, NumDevices, @@ -782,6 +751,10 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate( if (NULL == phContext) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL != pProperties && UR_CONTEXT_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } ur_result_t result = @@ -1659,6 +1632,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -1706,6 +1683,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -1754,6 +1735,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -2279,6 +2264,11 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemCreate( if (NULL == phPhysicalMem) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL != pProperties && + UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } ur_result_t result = @@ -3251,6 +3241,11 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( if (NULL == hKernel) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + + if (NULL != pProperties && + UR_MEM_FLAGS_MASK & pProperties->memoryAccess) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } ur_result_t result = @@ -3441,6 +3436,22 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate( if (NULL == phQueue) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL != pProperties && UR_QUEUE_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pProperties != NULL && + pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && + pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + } + + if (pProperties != NULL && + pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && + pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + } } ur_result_t result = pfnCreate(hContext, hDevice, pProperties, phQueue); @@ -3877,9 +3888,13 @@ __urdlllocal ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus) { + if (UR_EXECUTION_INFO_QUEUED < execStatus) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + if (execStatus == UR_EXECUTION_INFO_QUEUED) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } } ur_result_t result = @@ -3949,6 +3964,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnKernelLaunch( @@ -3991,6 +4014,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWait( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4033,6 +4064,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnEventsWaitWithBarrier(hQueue, numEventsInWaitList, @@ -4045,7 +4084,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -4086,6 +4126,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4099,7 +4152,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4142,6 +4196,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4155,7 +4222,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -4248,6 +4316,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (auto boundsError = bounds(hBuffer, bufferOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferReadRect( @@ -4262,7 +4343,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -4359,6 +4441,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (auto boundsError = bounds(hBuffer, bufferOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferWriteRect( @@ -4372,9 +4467,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -4414,6 +4511,24 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hBufferSrc, srcOffset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hBufferDst, dstOffset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4426,9 +4541,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -4513,6 +4630,24 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (auto boundsError = bounds(hBufferSrc, srcOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hBufferDst, dstOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferCopyRect( @@ -4527,10 +4662,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4568,6 +4704,39 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (patternSize == 0 || size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (patternSize > size) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((patternSize & (patternSize - 1)) != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (size % patternSize != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (offset % patternSize != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4581,7 +4750,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -4627,6 +4797,23 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (region.width == 0 || region.height == 0 || region.depth == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (auto boundsError = boundsImage(hImage, origin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemImageRead( @@ -4640,7 +4827,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -4687,6 +4875,23 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (region.width == 0 || region.height == 0 || region.depth == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (auto boundsError = boundsImage(hImage, origin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemImageWrite( @@ -4699,9 +4904,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -4747,6 +4954,28 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (region.width == 0 || region.height == 0 || region.depth == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (auto boundsError = boundsImage(hImageSrc, srcOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = boundsImage(hImageDst, dstOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4760,7 +4989,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -4807,6 +5037,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, @@ -4859,6 +5102,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4872,7 +5123,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -4901,7 +5152,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (NULL == ptr) { + if (NULL == pMem) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -4932,10 +5183,23 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hQueue, pMem, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = - pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); return result; @@ -4946,9 +5210,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4989,6 +5255,24 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hQueue, pDst, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hQueue, pSrc, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5001,9 +5285,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -5045,6 +5330,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hQueue, pMem, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5057,9 +5355,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -5087,6 +5386,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( if (size == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (auto boundsError = bounds(hQueue, pMem, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } } ur_result_t result = pfnUSMAdvise(hQueue, pMem, size, advice, phEvent); @@ -5098,7 +5402,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -5178,6 +5483,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hQueue, pMem, 0, pitch * height); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5192,10 +5510,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -5256,6 +5577,24 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (auto boundsError = bounds(hQueue, pDst, 0, dstPitch * height); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hQueue, pSrc, 0, srcPitch * height); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5319,6 +5658,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnDeviceGlobalVariableWrite( @@ -5382,6 +5729,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnDeviceGlobalVariableRead( @@ -5448,6 +5803,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueReadHostPipe( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5481,7 +5844,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { auto pfnWriteHostPipe = context.urDdiTable.Enqueue.pfnWriteHostPipe; @@ -5507,10 +5870,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (NULL == phEvent) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - if (phEventWaitList == NULL && numEventsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } @@ -5518,6 +5877,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5567,6 +5934,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -5935,6 +6306,14 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( if (pImageDesc && UR_MEM_TYPE_IMAGE1D_BUFFER < pImageDesc->type) { return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnImageCopyExp( @@ -6301,6 +6680,14 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( if (NULL == hSemaphore) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnWaitExternalSemaphoreExp( @@ -6341,6 +6728,14 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( if (NULL == hSemaphore) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnSignalExternalSemaphoreExp( @@ -6528,8 +6923,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -6542,10 +6937,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMemcpyUSMExp = - context.urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; + auto pfnAppendUSMMemcpyExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6575,7 +6970,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } } - ur_result_t result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + ur_result_t result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6583,26 +6978,26 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. - ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. - size_t srcOffset, ///< [in] Offset into the source memory. - size_t dstOffset, ///< [in] Offset into the destination memory - size_t size, ///< [in] The number of bytes to be copied. + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. ) { - auto pfnAppendMembufferCopyExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; + auto pfnAppendUSMFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; - if (nullptr == pfnAppendMembufferCopyExp) { + if (nullptr == pfnAppendUSMFillExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6611,13 +7006,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (NULL == hSrcMem) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + if (NULL == pMemory) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (NULL == hDstMem) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - } + if (NULL == pPattern) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (patternSize == 0 || size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (patternSize > size) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((patternSize & (patternSize - 1)) != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (size % patternSize != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; @@ -6628,7 +7039,60 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } } - ur_result_t result = pfnAppendMembufferCopyExp( + ur_result_t result = pfnAppendUSMFillExp( + hCommandBuffer, pMemory, pPattern, patternSize, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. + ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. + size_t srcOffset, ///< [in] Offset into the source memory. + size_t dstOffset, ///< [in] Offset into the destination memory + size_t size, ///< [in] The number of bytes to be copied. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t + *pSyncPoint ///< [out][optional] sync point associated with this command +) { + auto pfnAppendMemBufferCopyExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; + + if (nullptr == pfnAppendMemBufferCopyExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hSrcMem) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hDstMem) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + } + + ur_result_t result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6636,8 +7100,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6652,10 +7116,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; + auto pfnAppendMemBufferWriteExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6681,7 +7145,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } } - ur_result_t result = pfnAppendMembufferWriteExp( + ur_result_t result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6689,8 +7153,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6704,10 +7168,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; + auto pfnAppendMemBufferReadExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6733,7 +7197,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } } - ur_result_t result = pfnAppendMembufferReadExp( + ur_result_t result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6741,8 +7205,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6764,10 +7228,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyRectExp; + auto pfnAppendMemBufferCopyRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6793,7 +7257,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } } - ur_result_t result = pfnAppendMembufferCopyRectExp( + ur_result_t result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6802,8 +7266,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6831,10 +7295,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteRectExp; + auto pfnAppendMemBufferWriteRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6860,7 +7324,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } } - ur_result_t result = pfnAppendMembufferWriteRectExp( + ur_result_t result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6869,8 +7333,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6896,10 +7360,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadRectExp; + auto pfnAppendMemBufferReadRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6925,7 +7389,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( } } - ur_result_t result = pfnAppendMembufferReadRectExp( + ur_result_t result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6933,6 +7397,170 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendMemBufferFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pPattern) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + } + + ur_result_t result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMPrefetchExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMemory) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_USM_MIGRATION_FLAGS_MASK & flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnAppendUSMPrefetchExp( + hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMAdviseExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMemory) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_USM_ADVICE_FLAGS_MASK & advice) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, + advice, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -6972,6 +7600,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnEnqueueExp( @@ -6980,6 +7616,114 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + auto pfnCooperativeKernelLaunchExp = + context.urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hKernel) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pGlobalWorkOffset) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pGlobalWorkSize) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + ur_result_t result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + auto pfnSuggestMaxCooperativeGroupCountExp = + context.urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hKernel) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pGroupCountRet) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramBuildExp __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( @@ -7296,12 +8040,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - dditable.pfnInit = pDdiTable->pfnInit; - pDdiTable->pfnInit = ur_validation_layer::urInit; - - dditable.pfnTearDown = pDdiTable->pfnTearDown; - pDdiTable->pfnTearDown = ur_validation_layer::urTearDown; - dditable.pfnAdapterGet = pDdiTable->pfnAdapterGet; pDdiTable->pfnAdapterGet = ur_validation_layer::urAdapterGet; @@ -7470,36 +8208,52 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendKernelLaunchExp = ur_validation_layer::urCommandBufferAppendKernelLaunchExp; - dditable.pfnAppendMemcpyUSMExp = pDdiTable->pfnAppendMemcpyUSMExp; - pDdiTable->pfnAppendMemcpyUSMExp = - ur_validation_layer::urCommandBufferAppendMemcpyUSMExp; + dditable.pfnAppendUSMMemcpyExp = pDdiTable->pfnAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur_validation_layer::urCommandBufferAppendUSMMemcpyExp; + + dditable.pfnAppendUSMFillExp = pDdiTable->pfnAppendUSMFillExp; + pDdiTable->pfnAppendUSMFillExp = + ur_validation_layer::urCommandBufferAppendUSMFillExp; + + dditable.pfnAppendMemBufferCopyExp = pDdiTable->pfnAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur_validation_layer::urCommandBufferAppendMemBufferCopyExp; + + dditable.pfnAppendMemBufferWriteExp = pDdiTable->pfnAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur_validation_layer::urCommandBufferAppendMemBufferWriteExp; - dditable.pfnAppendMembufferCopyExp = pDdiTable->pfnAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyExp = - ur_validation_layer::urCommandBufferAppendMembufferCopyExp; + dditable.pfnAppendMemBufferReadExp = pDdiTable->pfnAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur_validation_layer::urCommandBufferAppendMemBufferReadExp; - dditable.pfnAppendMembufferWriteExp = pDdiTable->pfnAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteExp = - ur_validation_layer::urCommandBufferAppendMembufferWriteExp; + dditable.pfnAppendMemBufferCopyRectExp = + pDdiTable->pfnAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur_validation_layer::urCommandBufferAppendMemBufferCopyRectExp; - dditable.pfnAppendMembufferReadExp = pDdiTable->pfnAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadExp = - ur_validation_layer::urCommandBufferAppendMembufferReadExp; + dditable.pfnAppendMemBufferWriteRectExp = + pDdiTable->pfnAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur_validation_layer::urCommandBufferAppendMemBufferWriteRectExp; - dditable.pfnAppendMembufferCopyRectExp = - pDdiTable->pfnAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - ur_validation_layer::urCommandBufferAppendMembufferCopyRectExp; + dditable.pfnAppendMemBufferReadRectExp = + pDdiTable->pfnAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur_validation_layer::urCommandBufferAppendMemBufferReadRectExp; - dditable.pfnAppendMembufferWriteRectExp = - pDdiTable->pfnAppendMembufferWriteRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - ur_validation_layer::urCommandBufferAppendMembufferWriteRectExp; + dditable.pfnAppendMemBufferFillExp = pDdiTable->pfnAppendMemBufferFillExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur_validation_layer::urCommandBufferAppendMemBufferFillExp; - dditable.pfnAppendMembufferReadRectExp = - pDdiTable->pfnAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - ur_validation_layer::urCommandBufferAppendMembufferReadRectExp; + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_validation_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_validation_layer::urCommandBufferAppendUSMAdviseExp; dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; @@ -7676,6 +8430,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.EnqueueExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnCooperativeKernelLaunchExp = + pDdiTable->pfnCooperativeKernelLaunchExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur_validation_layer::urEnqueueCooperativeKernelLaunchExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses @@ -7814,6 +8604,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.KernelExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnSuggestMaxCooperativeGroupCountExp = + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur_validation_layer::urKernelSuggestMaxCooperativeGroupCountExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses @@ -8450,7 +9276,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( } ur_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data) { ur_result_t result = UR_RESULT_SUCCESS; if (enabledLayerNames.count(nameFullValidation)) { @@ -8494,6 +9321,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Enqueue); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetEnqueueExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EnqueueExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetEventProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Event); @@ -8504,6 +9336,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Kernel); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetKernelExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->KernelExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetMemProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Mem); @@ -8567,4 +9404,14 @@ ur_result_t context_t::init(ur_dditable_t *dditable, return result; } +ur_result_t context_t::tearDown() { + ur_result_t result = UR_RESULT_SUCCESS; + + if (enableLeakChecking) { + refCountContext.logInvalidReferences(); + refCountContext.clear(); + } + return result; +} + } // namespace ur_validation_layer diff --git a/source/loader/layers/validation/ur_validation_layer.cpp b/source/loader/layers/validation/ur_validation_layer.cpp index 5cd3f8c13a..3e040fcc50 100644 --- a/source/loader/layers/validation/ur_validation_layer.cpp +++ b/source/loader/layers/validation/ur_validation_layer.cpp @@ -11,6 +11,8 @@ */ #include "ur_validation_layer.hpp" +#include + namespace ur_validation_layer { context_t context; @@ -20,4 +22,127 @@ context_t::context_t() : logger(logger::create_logger("validation")) {} /////////////////////////////////////////////////////////////////////////////// context_t::~context_t() {} +// Some adapters don't support all the queries yet, we should be lenient and +// just not attempt to validate in those cases to preserve functionality. +#define RETURN_ON_FAILURE(result) \ + if (result == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION || \ + result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) \ + return UR_RESULT_SUCCESS; \ + if (result != UR_RESULT_SUCCESS) { \ + context.logger.error("Unexpected non-success result code from {}", \ + #result); \ + assert(0); \ + return result; \ + } + +ur_result_t bounds(ur_mem_handle_t buffer, size_t offset, size_t size) { + auto pfnMemGetInfo = context.urDdiTable.Mem.pfnGetInfo; + + size_t bufferSize = 0; + RETURN_ON_FAILURE(pfnMemGetInfo(buffer, UR_MEM_INFO_SIZE, + sizeof(bufferSize), &bufferSize, nullptr)); + + if (size + offset > bufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t bounds(ur_mem_handle_t buffer, ur_rect_offset_t offset, + ur_rect_region_t region) { + auto pfnMemGetInfo = context.urDdiTable.Mem.pfnGetInfo; + + size_t bufferSize = 0; + RETURN_ON_FAILURE(pfnMemGetInfo(buffer, UR_MEM_INFO_SIZE, + sizeof(bufferSize), &bufferSize, nullptr)); + + if (offset.x >= bufferSize || offset.y >= bufferSize || + offset.z >= bufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((region.width + offset.x) * (region.height + offset.y) * + (region.depth + offset.z) > + bufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t bounds(ur_queue_handle_t queue, const void *ptr, size_t offset, + size_t size) { + auto pfnQueueGetInfo = context.urDdiTable.Queue.pfnGetInfo; + auto pfnUSMGetMemAllocInfo = context.urDdiTable.USM.pfnGetMemAllocInfo; + + ur_context_handle_t urContext = nullptr; + RETURN_ON_FAILURE(pfnQueueGetInfo(queue, UR_QUEUE_INFO_CONTEXT, + sizeof(ur_context_handle_t), &urContext, + nullptr)); + ur_usm_type_t usmType = UR_USM_TYPE_UNKNOWN; + RETURN_ON_FAILURE( + pfnUSMGetMemAllocInfo(urContext, ptr, UR_USM_ALLOC_INFO_TYPE, + sizeof(usmType), &usmType, nullptr)); + + // We can't reliably get size info about pointers that didn't come from the + // USM alloc entry points. + if (usmType == UR_USM_TYPE_UNKNOWN) { + return UR_RESULT_SUCCESS; + } + + size_t allocSize = 0; + RETURN_ON_FAILURE( + pfnUSMGetMemAllocInfo(urContext, ptr, UR_USM_ALLOC_INFO_SIZE, + sizeof(allocSize), &allocSize, nullptr)); + + if (size + offset > allocSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t boundsImage(ur_mem_handle_t image, ur_rect_offset_t origin, + ur_rect_region_t region) { + auto pfnMemImageGetInfo = context.urDdiTable.Mem.pfnImageGetInfo; + + size_t width = 0; + RETURN_ON_FAILURE(pfnMemImageGetInfo(image, UR_IMAGE_INFO_WIDTH, + sizeof(width), &width, nullptr)); + if (region.width + origin.x > width) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t height = 0; + RETURN_ON_FAILURE(pfnMemImageGetInfo(image, UR_IMAGE_INFO_HEIGHT, + sizeof(height), &height, nullptr)); + + // Some adapters return a height and depth of 0 for images that don't have + // those dimensions, but regions for enqueue operations must set these to + // 1, so we need to make this adjustment to properly validate. + if (height == 0) { + height = 1; + } + + if (region.height + origin.y > height) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t depth = 0; + RETURN_ON_FAILURE(pfnMemImageGetInfo(image, UR_IMAGE_INFO_DEPTH, + sizeof(depth), &depth, nullptr)); + if (depth == 0) { + depth = 1; + } + + if (region.depth + origin.z > depth) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +#undef RETURN_ON_FAILURE + } // namespace ur_validation_layer diff --git a/source/loader/layers/validation/ur_validation_layer.hpp b/source/loader/layers/validation/ur_validation_layer.hpp index 3201a5345e..d29b64230e 100644 --- a/source/loader/layers/validation/ur_validation_layer.hpp +++ b/source/loader/layers/validation/ur_validation_layer.hpp @@ -34,7 +34,9 @@ class __urdlllocal context_t : public proxy_layer_context_t { return {nameFullValidation, nameParameterValidation, nameLeakChecking}; } ur_result_t init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) override; + const std::set &enabledLayerNames, + codeloc_data codelocData) override; + ur_result_t tearDown() override; private: const std::string nameFullValidation = "UR_LAYER_FULL_VALIDATION"; @@ -42,6 +44,17 @@ class __urdlllocal context_t : public proxy_layer_context_t { const std::string nameLeakChecking = "UR_LAYER_LEAK_CHECKING"; }; +ur_result_t bounds(ur_mem_handle_t buffer, size_t offset, size_t size); + +ur_result_t bounds(ur_mem_handle_t buffer, ur_rect_offset_t offset, + ur_rect_region_t region); + +ur_result_t bounds(ur_queue_handle_t queue, const void *ptr, size_t offset, + size_t size); + +ur_result_t boundsImage(ur_mem_handle_t image, ur_rect_offset_t origin, + ur_rect_region_t region); + extern context_t context; } // namespace ur_validation_layer diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 877206c062..3cfac34647 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -41,7 +41,8 @@ class AdapterRegistry { } if (exists) { - adaptersLoadPaths.emplace_back(std::vector{path}); + adaptersLoadPaths.emplace_back( + std::vector{std::move(path)}); } else { logger::warning( "Detected nonexistent path {} in environmental " @@ -113,10 +114,11 @@ class AdapterRegistry { // to load the adapter. std::vector> adaptersLoadPaths; - static constexpr std::array knownAdapterNames{ + static constexpr std::array knownAdapterNames{ MAKE_LIBRARY_NAME("ur_adapter_level_zero", "0"), - MAKE_LIBRARY_NAME("ur_adapter_cuda", "0"), - MAKE_LIBRARY_NAME("ur_adapter_hip", "0")}; + MAKE_LIBRARY_NAME("ur_adapter_hip", "0"), + MAKE_LIBRARY_NAME("ur_adapter_opencl", "0"), + MAKE_LIBRARY_NAME("ur_adapter_cuda", "0")}; std::optional> getEnvAdapterSearchPaths() { std::optional> pathStringsOpt; @@ -163,12 +165,12 @@ class AdapterRegistry { auto adapterNamePathOpt = getAdapterNameAsPath(adapterName); if (adapterNamePathOpt.has_value()) { - auto adapterNamePath = adapterNamePathOpt.value(); + const auto &adapterNamePath = adapterNamePathOpt.value(); loadPaths.emplace_back(adapterNamePath); } if (loaderLibPathOpt.has_value()) { - auto loaderLibPath = loaderLibPathOpt.value(); + const auto &loaderLibPath = loaderLibPathOpt.value(); loadPaths.emplace_back(loaderLibPath / adapterName); } diff --git a/source/loader/ur_codeloc.hpp b/source/loader/ur_codeloc.hpp new file mode 100644 index 0000000000..176ba0b13c --- /dev/null +++ b/source/loader/ur_codeloc.hpp @@ -0,0 +1,35 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_codeloc.hpp + * + */ + +#ifndef UR_CODELOC_HPP +#define UR_CODELOC_HPP 1 + +#include "ur_api.h" +#include + +struct codeloc_data { + codeloc_data() { + codelocCb = nullptr; + codelocUserdata = nullptr; + } + ur_code_location_callback_t codelocCb; + void *codelocUserdata; + + std::optional get_codeloc() { + if (!codelocCb) { + return std::nullopt; + } + return codelocCb(codelocUserdata); + } +}; + +#endif /* UR_CODELOC_HPP */ diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 649fc0ad88..5d7df7e672 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -33,41 +33,6 @@ ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. -) { - ur_result_t result = UR_RESULT_SUCCESS; - - for (auto &platform : context->platforms) { - if (platform.initStatus != UR_RESULT_SUCCESS) { - continue; - } - platform.initStatus = - platform.dditable.ur.Global.pfnInit(device_flags, hLoaderConfig); - } - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { - ur_result_t result = UR_RESULT_SUCCESS; - - for (auto &platform : context->platforms) { - platform.dditable.ur.Global.pfnTearDown(pParams); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -100,6 +65,9 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( break; } adapterIndex++; + if (adapterIndex == NumEntries) { + break; + } } } @@ -475,8 +443,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -3884,7 +3852,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -3948,7 +3917,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4014,7 +3984,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -4091,7 +4062,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -4170,9 +4142,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -4237,9 +4211,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -4315,10 +4291,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4379,7 +4356,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -4448,7 +4426,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -4517,9 +4496,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -4591,7 +4572,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -4718,7 +4700,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -4758,7 +4740,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( // forward to device-platform result = - pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitListLocal.data(), phEvent); if (UR_RESULT_SUCCESS != result) { @@ -4783,9 +4765,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4841,9 +4825,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4899,9 +4884,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -4943,7 +4929,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -5012,10 +4999,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -5306,7 +5296,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -5343,8 +5333,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( try { // convert platform handle to loader handle - *phEvent = reinterpret_cast( - ur_event_factory.getInstance(*phEvent, dditable)); + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + ur_event_factory.getInstance(*phEvent, dditable)); + } } catch (std::bad_alloc &) { result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -6380,8 +6372,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -6400,9 +6392,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMemcpyUSMExp = - dditable->ur.CommandBufferExp.pfnAppendMemcpyUSMExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + auto pfnAppendUSMMemcpyExp = + dditable->ur.CommandBufferExp.pfnAppendUSMMemcpyExp; + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6412,7 +6404,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ->handle; // forward to device-platform - result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6420,8 +6412,50 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendUSMFillExp = + dditable->ur.CommandBufferExp.pfnAppendUSMFillExp; + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, + size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6442,9 +6476,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferCopyExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + auto pfnAppendMemBufferCopyExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyExp; + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6460,7 +6494,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( hDstMem = reinterpret_cast(hDstMem)->handle; // forward to device-platform - result = pfnAppendMembufferCopyExp( + result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6468,8 +6502,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6490,9 +6524,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferWriteExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + auto pfnAppendMemBufferWriteExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteExp; + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6505,7 +6539,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferWriteExp(hCommandBuffer, hBuffer, offset, size, + result = pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6513,8 +6547,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6534,9 +6568,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferReadExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + auto pfnAppendMemBufferReadExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferReadExp; + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6549,7 +6583,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferReadExp(hCommandBuffer, hBuffer, offset, size, + result = pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6557,8 +6591,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6586,9 +6620,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferCopyRectExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + auto pfnAppendMemBufferCopyRectExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyRectExp; + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6604,7 +6638,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( hDstMem = reinterpret_cast(hDstMem)->handle; // forward to device-platform - result = pfnAppendMembufferCopyRectExp( + result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6613,8 +6647,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6648,9 +6682,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferWriteRectExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + auto pfnAppendMemBufferWriteRectExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteRectExp; + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6663,7 +6697,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferWriteRectExp( + result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6672,8 +6706,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6705,9 +6739,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferReadRectExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + auto pfnAppendMemBufferReadRectExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferReadRectExp; + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6720,7 +6754,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferReadRectExp( + result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6728,6 +6762,132 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendMemBufferFillExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferFillExp; + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // convert loader handle to platform handle + hBuffer = reinterpret_cast(hBuffer)->handle; + + // forward to device-platform + result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendUSMPrefetchExp = + dditable->ur.CommandBufferExp.pfnAppendUSMPrefetchExp; + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendUSMAdviseExp = + dditable->ur.CommandBufferExp.pfnAppendUSMAdviseExp; + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -6793,6 +6953,109 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnCooperativeKernelLaunchExp = + dditable->ur.EnqueueExp.pfnCooperativeKernelLaunchExp; + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + hKernel = reinterpret_cast(hKernel)->handle; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitListLocal.data(), + phEvent); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + ur_event_factory.getInstance(*phEvent, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hKernel)->dditable; + auto pfnSuggestMaxCooperativeGroupCountExp = + dditable->ur.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hKernel = reinterpret_cast(hKernel)->handle; + + // forward to device-platform + result = pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramBuildExp __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( @@ -7126,8 +7389,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (ur_loader::context->platforms.size() != 1 || ur_loader::context->forceIntercept) { // return pointers to loader's DDIs - pDdiTable->pfnInit = ur_loader::urInit; - pDdiTable->pfnTearDown = ur_loader::urTearDown; pDdiTable->pfnAdapterGet = ur_loader::urAdapterGet; pDdiTable->pfnAdapterRelease = ur_loader::urAdapterRelease; pDdiTable->pfnAdapterRetain = ur_loader::urAdapterRetain; @@ -7284,20 +7545,28 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnFinalizeExp = ur_loader::urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = ur_loader::urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = - ur_loader::urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = - ur_loader::urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferWriteExp = - ur_loader::urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferReadExp = - ur_loader::urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - ur_loader::urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - ur_loader::urCommandBufferAppendMembufferWriteRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - ur_loader::urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur_loader::urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = + ur_loader::urCommandBufferAppendUSMFillExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur_loader::urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur_loader::urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur_loader::urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur_loader::urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur_loader::urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur_loader::urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur_loader::urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_loader::urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; } else { // return pointers directly to platform's DDIs @@ -7453,6 +7722,61 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetEnqueueExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.EnqueueExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnCooperativeKernelLaunchExp = + ur_loader::urEnqueueCooperativeKernelLaunchExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.EnqueueExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses @@ -7583,6 +7907,61 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetKernelExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.KernelExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur_loader::urKernelSuggestMaxCooperativeGroupCountExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.KernelExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 964da234f1..34531ca8b1 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -55,15 +55,22 @@ void context_t::parseEnvEnabledLayers() { void context_t::initLayers() const { for (auto &l : layers) { if (l->isAvailable()) { - l->init(&context->urDdiTable, enabledLayerNames); + l->init(&context->urDdiTable, enabledLayerNames, codelocData); + } + } +} + +void context_t::tearDownLayers() const { + for (auto &l : layers) { + if (l->isAvailable()) { + l->tearDown(); } } } ////////////////////////////////////////////////////////////////////////// -__urdlllocal ur_result_t -context_t::Init(ur_device_init_flags_t device_flags, - ur_loader_config_handle_t hLoaderConfig) { +__urdlllocal ur_result_t context_t::Init( + ur_device_init_flags_t, ur_loader_config_handle_t hLoaderConfig) { ur_result_t result; const char *logger_name = "loader"; logger::init(logger_name); @@ -72,10 +79,11 @@ context_t::Init(ur_device_init_flags_t device_flags, result = ur_loader::context->init(); if (UR_RESULT_SUCCESS == result) { - result = urInit(); + result = urLoaderInit(); } if (hLoaderConfig) { + codelocData = hLoaderConfig->codelocData; enabledLayerNames.merge(hLoaderConfig->getEnabledLayerNames()); } @@ -174,4 +182,28 @@ ur_result_t urLoaderConfigEnableLayer(ur_loader_config_handle_t hLoaderConfig, hLoaderConfig->enabledLayers.insert(pLayerName); return UR_RESULT_SUCCESS; } + +ur_result_t urLoaderTearDown() { + context->tearDownLayers(); + + return UR_RESULT_SUCCESS; +} + +ur_result_t +urLoaderConfigSetCodeLocationCallback(ur_loader_config_handle_t hLoaderConfig, + ur_code_location_callback_t pfnCodeloc, + void *pUserData) { + if (!hLoaderConfig) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + if (!pfnCodeloc) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + hLoaderConfig->codelocData.codelocCb = pfnCodeloc; + hLoaderConfig->codelocData.codelocUserdata = pUserData; + + return UR_RESULT_SUCCESS; +} + } // namespace ur_lib diff --git a/source/loader/ur_lib.hpp b/source/loader/ur_lib.hpp index 1f0f23658b..9d1e02a67e 100644 --- a/source/loader/ur_lib.hpp +++ b/source/loader/ur_lib.hpp @@ -14,6 +14,7 @@ #define UR_LOADER_LIB_H 1 #include "ur_api.h" +#include "ur_codeloc.hpp" #include "ur_ddi.h" #include "ur_proxy_layer.hpp" #include "ur_util.hpp" @@ -42,6 +43,8 @@ struct ur_loader_config_handle_t_ { return refCount.load(std::memory_order_acquire); } std::set &getEnabledLayerNames() { return enabledLayers; } + + codeloc_data codelocData; }; namespace ur_lib { @@ -60,7 +63,7 @@ class __urdlllocal context_t { ur_result_t Init(ur_device_init_flags_t dflags, ur_loader_config_handle_t hLoaderConfig); - ur_result_t urInit(); + ur_result_t urLoaderInit(); ur_dditable_t urDdiTable = {}; const std::vector layers = { @@ -72,9 +75,12 @@ class __urdlllocal context_t { std::string availableLayers; std::set enabledLayerNames; + codeloc_data codelocData; + bool layerExists(const std::string &layerName) const; void parseEnvEnabledLayers(); void initLayers() const; + void tearDownLayers() const; }; extern context_t *context; @@ -87,5 +93,11 @@ ur_result_t urLoaderConfigGetInfo(ur_loader_config_handle_t hLoaderConfig, size_t *pPropSizeRet); ur_result_t urLoaderConfigEnableLayer(ur_loader_config_handle_t hLoaderConfig, const char *pLayerName); +ur_result_t urLoaderTearDown(); +ur_result_t +urLoaderConfigSetCodeLocationCallback(ur_loader_config_handle_t hLoaderConfig, + ur_code_location_callback_t pfnCodeloc, + void *pUserData); + } // namespace ur_lib #endif /* UR_LOADER_LIB_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index ccf1e1e2cf..80d1bc3fb6 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -158,21 +158,55 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Initialize the 'oneAPI' adapter(s) +/// @brief Set a function callback for use by the loader to retrieve code +/// location information. +/// +/// @details +/// - The code location callback is optional and provides additional +/// information to the tracing layer about the entry point of the current +/// execution flow. +/// - This functionality can be used to match traced unified runtime +/// function calls with higher-level user calls. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hLoaderConfig` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnCodeloc` +ur_result_t UR_APICALL urLoaderConfigSetCodeLocationCallback( + ur_loader_config_handle_t + hLoaderConfig, ///< [in] Handle to config object the layer will be enabled for. + ur_code_location_callback_t + pfnCodeloc, ///< [in] Function pointer to code location callback. + void * + pUserData ///< [in][out][optional] pointer to data to be passed to callback. + ) try { + return ur_lib::urLoaderConfigSetCodeLocationCallback(hLoaderConfig, + pfnCodeloc, pUserData); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Initialize the 'oneAPI' loader /// /// @details /// - The application must call this function before calling any other /// function. /// - If this function is not called then all other functions will return /// ::UR_RESULT_ERROR_UNINITIALIZED. -/// - Only one instance of each adapter will be initialized per process. +/// - Only one instance of the loader will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios -/// where multiple libraries may initialize the adapter(s) simultaneously. +/// where multiple libraries may initialize the loader simultaneously. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -182,51 +216,38 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_INIT_FLAGS_MASK & device_flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urInit( +ur_result_t UR_APICALL urLoaderInit( ur_device_init_flags_t device_flags, ///< [in] device initialization flags. ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. ur_loader_config_handle_t hLoaderConfig ///< [in][optional] Handle of loader config handle. ) try { + + if (UR_DEVICE_INIT_FLAGS_MASK & device_flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + static ur_result_t result = UR_RESULT_SUCCESS; std::call_once(ur_lib::context->initOnce, [device_flags, hLoaderConfig]() { result = ur_lib::context->Init(device_flags, hLoaderConfig); }); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - auto pfnInit = ur_lib::context->urDdiTable.Global.pfnInit; - if (nullptr == pfnInit) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - return pfnInit(device_flags, hLoaderConfig); + return result; } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Tear down the 'oneAPI' instance and release all its resources +/// @brief Tear down the 'oneAPI' loader and release all its resources /// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED /// - ::UR_RESULT_ERROR_DEVICE_LOST /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pParams` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters - ) try { - auto pfnTearDown = ur_lib::context->urDdiTable.Global.pfnTearDown; - if (nullptr == pfnTearDown) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - return pfnTearDown(pParams); +ur_result_t UR_APICALL urLoaderTearDown(void) try { + return ur_lib::urLoaderTearDown(); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -280,7 +301,9 @@ ur_result_t UR_APICALL urAdapterGet( /// /// @details /// - When the reference count of the adapter reaches zero, the adapter may -/// perform adapter-specififc resource teardown +/// perform adapter-specififc resource teardown. Resources must be left in +/// a state where it safe for the adapter to be subsequently reinitialized +/// with ::urAdapterGet /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -464,6 +487,7 @@ ur_result_t UR_APICALL urAdapterGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phAdapters` /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phPlatforms != NULL` ur_result_t UR_APICALL urPlatformGet( ur_adapter_handle_t * phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms. @@ -723,14 +747,18 @@ ur_result_t UR_APICALL urPlatformGetBackendOption( /// + `NULL == hPlatform` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_TYPE_VPU < DeviceType` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phDevices != NULL` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NumEntries > 0 && phDevices == NULL` /// - ::UR_RESULT_ERROR_INVALID_VALUE ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -905,6 +933,7 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// + `NULL == pProperties->pProperties` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT ur_result_t UR_APICALL urDevicePartition( @@ -1124,6 +1153,8 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` /// + `NULL == phContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_CONTEXT_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ur_result_t UR_APICALL urContextCreate( @@ -2098,6 +2129,8 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2153,6 +2186,8 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2210,6 +2245,8 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2718,6 +2755,8 @@ ur_result_t UR_APICALL urVirtualMemGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phPhysicalMem` /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -3791,6 +3830,8 @@ ur_result_t UR_APICALL urKernelSetArgSampler( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_MEM_FLAGS_MASK & pProperties->memoryAccess` /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object @@ -4005,12 +4046,15 @@ ur_result_t UR_APICALL urQueueGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_QUEUE_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phQueue` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE -/// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW` +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urQueueCreate( @@ -4323,6 +4367,8 @@ ur_result_t UR_APICALL urEventGetInfo( /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -4538,6 +4584,8 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - The registered callback function will be called when the execution /// status of command associated with event changes to an execution status /// equal to or past the status specified by command_exec_status. +/// - `execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the +/// initial state of all events. /// - The application may call this function from simultaneous threads for /// the same context. /// - The implementation of this function should be thread-safe. @@ -4550,9 +4598,11 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus` +/// + `::UR_EXECUTION_INFO_QUEUED < execStatus` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pfnNotify` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + `execStatus == UR_EXECUTION_INFO_QUEUED` ur_result_t UR_APICALL urEventSetCallback( ur_event_handle_t hEvent, ///< [in] handle of the event object ur_execution_info_t execStatus, ///< [in] execution status of the event @@ -4783,7 +4833,8 @@ ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -4844,7 +4895,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferRead( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4917,7 +4969,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -5002,7 +5055,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -5075,9 +5129,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -5141,9 +5197,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -5206,15 +5264,21 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + `offset % patternSize != 0` /// + If `offset + size` results in an out-of-bounds access. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -5268,11 +5332,14 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -5333,11 +5400,14 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -5394,12 +5464,16 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -5471,7 +5545,8 @@ ur_result_t UR_APICALL urEnqueueMemImageCopy( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -5564,7 +5639,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hQueue` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == ptr` +/// + `NULL == pMem` /// + `NULL == pPattern` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -5583,7 +5658,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -5606,7 +5681,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + return pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); @@ -5640,9 +5715,11 @@ ur_result_t UR_APICALL urEnqueueUSMFill( ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -5667,6 +5744,11 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to prefetch USM memory /// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -5691,9 +5773,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -5719,6 +5802,11 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to set USM memory advice /// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -5739,9 +5827,10 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -5790,7 +5879,8 @@ ur_result_t UR_APICALL urEnqueueUSMAdvise( /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -5855,10 +5945,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -6059,7 +6152,6 @@ ur_result_t UR_APICALL urEnqueueReadHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pipe_symbol` /// + `NULL == pSrc` -/// + `NULL == phEvent` /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` @@ -6086,7 +6178,7 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) try { auto pfnWriteHostPipe = @@ -6128,6 +6220,8 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// + `NULL == pResultPitch` @@ -7116,7 +7210,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -7129,19 +7223,74 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMemcpyUSMExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + auto pfnAppendUSMMemcpyExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + return pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + If `size` is higher than the allocation size of `ptr` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendUSMFillExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, + size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Append a memory copy command to a command-buffer object /// @@ -7162,7 +7311,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -7177,13 +7326,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferCopyExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + auto pfnAppendMemBufferCopyExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferCopyExp( + return pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { @@ -7211,7 +7360,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7226,13 +7375,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferWriteExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + auto pfnAppendMemBufferWriteExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferWriteExp(hCommandBuffer, hBuffer, offset, size, + return pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { @@ -7260,7 +7409,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7274,13 +7423,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferReadExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + auto pfnAppendMemBufferReadExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferReadExp(hCommandBuffer, hBuffer, offset, size, + return pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { @@ -7307,7 +7456,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -7329,14 +7478,14 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferCopyRectExp = + auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp - .pfnAppendMembufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + .pfnAppendMemBufferCopyRectExp; + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferCopyRectExp( + return pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -7365,7 +7514,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7393,14 +7542,14 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferWriteRectExp = + auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp - .pfnAppendMembufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + .pfnAppendMemBufferWriteRectExp; + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferWriteRectExp( + return pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -7429,7 +7578,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7455,14 +7604,14 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferReadRectExp = + auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp - .pfnAppendMembufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + .pfnAppendMemBufferReadRectExp; + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferReadRectExp( + return pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -7470,6 +7619,170 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a memory fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// + `NULL == hBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendMemBufferFillExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Prefetch command to a command-buffer object +/// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_MIGRATION_FLAGS_MASK & flags` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendUSMPrefetchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Advise command to a command-buffer object +/// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_ADVICE_FLAGS_MASK & advice` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendUSMAdviseExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -7517,6 +7830,103 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a command to execute a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGlobalWorkOffset` +/// + `NULL == pGlobalWorkSize` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. + ) try { + auto pfnCooperativeKernelLaunchExp = + ur_lib::context->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Query the maximum number of work groups for a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGroupCountRet` +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups + ) try { + auto pfnSuggestMaxCooperativeGroupCountExp = + ur_lib::context->urDdiTable.KernelExp + .pfnSuggestMaxCooperativeGroupCountExp; + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one program, negates need for the /// linking step. diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index 2d0095f41d..bf28e09a71 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -17,7 +17,7 @@ namespace ur_lib { /////////////////////////////////////////////////////////////////////////////// -__urdlllocal ur_result_t context_t::urInit() { +__urdlllocal ur_result_t context_t::urLoaderInit() { ur_result_t result = UR_RESULT_SUCCESS; if (UR_RESULT_SUCCESS == result) { @@ -45,6 +45,11 @@ __urdlllocal ur_result_t context_t::urInit() { &urDdiTable.Enqueue); } + if (UR_RESULT_SUCCESS == result) { + result = urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.EnqueueExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Event); @@ -55,6 +60,11 @@ __urdlllocal ur_result_t context_t::urInit() { &urDdiTable.Kernel); } + if (UR_RESULT_SUCCESS == result) { + result = urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.KernelExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Mem); } diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index 0437d719ba..da5ef0d81f 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -106,6 +106,7 @@ class ur_shared_mutex { // nop. class ur_mutex { std::mutex Mutex; + friend class ur_lock; public: void lock() { @@ -121,6 +122,17 @@ class ur_mutex { } }; +class ur_lock { + std::unique_lock Lock; + +public: + explicit ur_lock(ur_mutex &Mutex) { + if (!SingleThreadMode) { + Lock = std::unique_lock(Mutex.Mutex); + } + } +}; + /// SpinLock is a synchronization primitive, that uses atomic variable and /// causes thread trying acquire lock wait in loop while repeatedly check if /// the lock is available. diff --git a/source/ur_api.cpp b/source/ur_api.cpp index bc5b473cf9..3a7cebca8c 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -7,7 +7,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ur_api.cpp - * @version v0.7-r0 + * @version v0.9-r0 * */ #include "ur_api.h" @@ -149,21 +149,53 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Initialize the 'oneAPI' adapter(s) +/// @brief Set a function callback for use by the loader to retrieve code +/// location information. +/// +/// @details +/// - The code location callback is optional and provides additional +/// information to the tracing layer about the entry point of the current +/// execution flow. +/// - This functionality can be used to match traced unified runtime +/// function calls with higher-level user calls. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hLoaderConfig` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnCodeloc` +ur_result_t UR_APICALL urLoaderConfigSetCodeLocationCallback( + ur_loader_config_handle_t + hLoaderConfig, ///< [in] Handle to config object the layer will be enabled for. + ur_code_location_callback_t + pfnCodeloc, ///< [in] Function pointer to code location callback. + void * + pUserData ///< [in][out][optional] pointer to data to be passed to callback. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Initialize the 'oneAPI' loader /// /// @details /// - The application must call this function before calling any other /// function. /// - If this function is not called then all other functions will return /// ::UR_RESULT_ERROR_UNINITIALIZED. -/// - Only one instance of each adapter will be initialized per process. +/// - Only one instance of the loader will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios -/// where multiple libraries may initialize the adapter(s) simultaneously. +/// where multiple libraries may initialize the loader simultaneously. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -173,7 +205,7 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_INIT_FLAGS_MASK & device_flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urInit( +ur_result_t UR_APICALL urLoaderInit( ur_device_init_flags_t device_flags, ///< [in] device initialization flags. ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. ur_loader_config_handle_t @@ -184,19 +216,15 @@ ur_result_t UR_APICALL urInit( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Tear down the 'oneAPI' instance and release all its resources +/// @brief Tear down the 'oneAPI' loader and release all its resources /// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED /// - ::UR_RESULT_ERROR_DEVICE_LOST /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pParams` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { +ur_result_t UR_APICALL urLoaderTearDown(void) { ur_result_t result = UR_RESULT_SUCCESS; return result; } @@ -244,7 +272,9 @@ ur_result_t UR_APICALL urAdapterGet( /// /// @details /// - When the reference count of the adapter reaches zero, the adapter may -/// perform adapter-specififc resource teardown +/// perform adapter-specififc resource teardown. Resources must be left in +/// a state where it safe for the adapter to be subsequently reinitialized +/// with ::urAdapterGet /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -400,6 +430,7 @@ ur_result_t UR_APICALL urAdapterGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phAdapters` /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phPlatforms != NULL` ur_result_t UR_APICALL urPlatformGet( ur_adapter_handle_t * phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms. @@ -618,14 +649,18 @@ ur_result_t UR_APICALL urPlatformGetBackendOption( /// + `NULL == hPlatform` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_TYPE_VPU < DeviceType` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phDevices != NULL` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NumEntries > 0 && phDevices == NULL` /// - ::UR_RESULT_ERROR_INVALID_VALUE ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -776,6 +811,7 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// + `NULL == pProperties->pProperties` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT ur_result_t UR_APICALL urDevicePartition( @@ -960,6 +996,8 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` /// + `NULL == phContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_CONTEXT_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ur_result_t UR_APICALL urContextCreate( @@ -1781,6 +1819,8 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -1830,6 +1870,8 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -1881,6 +1923,8 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2300,6 +2344,8 @@ ur_result_t UR_APICALL urVirtualMemGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phPhysicalMem` /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -3199,6 +3245,8 @@ ur_result_t UR_APICALL urKernelSetArgSampler( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_MEM_FLAGS_MASK & pProperties->memoryAccess` /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object @@ -3379,12 +3427,15 @@ ur_result_t UR_APICALL urQueueGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_QUEUE_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phQueue` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE -/// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW` +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urQueueCreate( @@ -3646,6 +3697,8 @@ ur_result_t UR_APICALL urEventGetInfo( /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -3820,6 +3873,8 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - The registered callback function will be called when the execution /// status of command associated with event changes to an execution status /// equal to or past the status specified by command_exec_status. +/// - `execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the +/// initial state of all events. /// - The application may call this function from simultaneous threads for /// the same context. /// - The implementation of this function should be thread-safe. @@ -3832,9 +3887,11 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus` +/// + `::UR_EXECUTION_INFO_QUEUED < execStatus` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pfnNotify` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + `execStatus == UR_EXECUTION_INFO_QUEUED` ur_result_t UR_APICALL urEventSetCallback( ur_event_handle_t hEvent, ///< [in] handle of the event object ur_execution_info_t execStatus, ///< [in] execution status of the event @@ -4037,7 +4094,8 @@ ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -4090,7 +4148,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferRead( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4155,7 +4214,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -4230,7 +4290,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -4293,9 +4354,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -4350,9 +4413,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -4405,15 +4470,21 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + `offset % patternSize != 0` /// + If `offset + size` results in an out-of-bounds access. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4458,11 +4529,14 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -4515,11 +4589,14 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -4567,12 +4644,16 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -4636,7 +4717,8 @@ ur_result_t UR_APICALL urEnqueueMemImageCopy( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -4714,7 +4796,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hQueue` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == ptr` +/// + `NULL == pMem` /// + `NULL == pPattern` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -4733,7 +4815,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -4783,9 +4865,11 @@ ur_result_t UR_APICALL urEnqueueUSMFill( ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4803,6 +4887,11 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to prefetch USM memory /// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -4827,9 +4916,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4848,6 +4938,11 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to set USM memory advice /// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -4868,9 +4963,10 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -4913,7 +5009,8 @@ ur_result_t UR_APICALL urEnqueueUSMAdvise( /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -4971,10 +5068,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -5142,7 +5242,6 @@ ur_result_t UR_APICALL urEnqueueReadHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pipe_symbol` /// + `NULL == pSrc` -/// + `NULL == phEvent` /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` @@ -5169,7 +5268,7 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -5203,6 +5302,8 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// + `NULL == pResultPitch` @@ -6007,7 +6108,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -6024,6 +6125,52 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + If `size` is higher than the allocation size of `ptr` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Append a memory copy command to a command-buffer object /// @@ -6044,7 +6191,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6084,7 +6231,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6124,7 +6271,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6162,7 +6309,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6209,7 +6356,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6262,7 +6409,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6292,6 +6439,143 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a memory fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// + `NULL == hBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Prefetch command to a command-buffer object +/// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_MIGRATION_FLAGS_MASK & flags` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Advise command to a command-buffer object +/// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_ADVICE_FLAGS_MASK & advice` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -6331,6 +6615,86 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a command to execute a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGlobalWorkOffset` +/// + `NULL == pGlobalWorkSize` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Query the maximum number of work groups for a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGroupCountRet` +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one program, negates need for the /// linking step. diff --git a/test/adapters/CMakeLists.txt b/test/adapters/CMakeLists.txt index d87d774ede..3191178606 100644 --- a/test/adapters/CMakeLists.txt +++ b/test/adapters/CMakeLists.txt @@ -37,10 +37,10 @@ function(add_adapter_test name) ENVIRONMENT "${args_ENVIRONMENT}") endfunction() -if(UR_BUILD_ADAPTER_CUDA) +if(UR_BUILD_ADAPTER_CUDA OR UR_BUILD_ADAPTER_ALL) add_subdirectory(cuda) endif() -if(UR_BUILD_ADAPTER_HIP) +if(UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_ALL) add_subdirectory(hip) endif() diff --git a/test/adapters/hip/test_context.cpp b/test/adapters/hip/test_context.cpp index 90c28b842f..c58dfc5af7 100644 --- a/test/adapters/hip/test_context.cpp +++ b/test/adapters/hip/test_context.cpp @@ -28,7 +28,9 @@ TEST_P(urHipContextTest, ActiveContexts) { hipCtx_t hipContext = nullptr; ASSERT_SUCCESS_HIP(hipCtxGetCurrent(&hipContext)); ASSERT_NE(hipContext, nullptr); - ASSERT_EQ(hipContext, context->getDevice()->getNativeContext()); + if (context->getDevices().size() == 1) { + ASSERT_EQ(hipContext, context->getDevices()[0]->getNativeContext()); + } ASSERT_SUCCESS(urQueueRelease(queue)); ASSERT_SUCCESS(urContextRelease(context)); @@ -60,7 +62,9 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { // check that the first context is now the active HIP context ASSERT_SUCCESS_HIP(hipCtxGetCurrent(¤t)); - ASSERT_EQ(current, context1->getDevice()->getNativeContext()); + if (context1->getDevices().size() == 1) { + ASSERT_EQ(current, context1->getDevices()[0]->getNativeContext()); + } ASSERT_SUCCESS(urQueueRelease(queue)); @@ -87,7 +91,9 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { // check that the second context is now the active HIP context ASSERT_SUCCESS_HIP(hipCtxGetCurrent(¤t)); - ASSERT_EQ(current, context2->getDevice()->getNativeContext()); + if (context2->getDevices().size() == 1) { + ASSERT_EQ(current, context2->getDevices()[0]->getNativeContext()); + } ASSERT_SUCCESS(urQueueRelease(queue)); }); diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 5a898846ec..df80c02681 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -4,6 +4,8 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(UR_CONFORMANCE_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +option(UR_TEST_DEVICES_COUNT "Count of devices on which CTS will be run" 1) +option(UR_TEST_PLATFORMS_COUNT "Count of platforms on which CTS will be run" 1) function(add_test_adapter name adapter) set(TEST_TARGET_NAME test-${name}) @@ -12,7 +14,7 @@ function(add_test_adapter name adapter) add_test(NAME ${TEST_NAME} COMMAND ${CMAKE_COMMAND} -D TEST_FILE=${Python3_EXECUTABLE} - -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${CMAKE_BINARY_DIR}/bin/${TEST_TARGET_NAME}" + -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${CMAKE_BINARY_DIR}/bin/${TEST_TARGET_NAME} --test_devices_count=${UR_TEST_DEVICES_COUNT} --test_platforms_count=${UR_TEST_PLATFORMS_COUNT}" -D MODE=stdout -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}_${adapter}.match -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake @@ -39,25 +41,25 @@ function(add_conformance_test name) ${PROJECT_NAME}::common GTest::gtest_main) - if(UR_BUILD_ADAPTER_CUDA) + if(UR_BUILD_ADAPTER_CUDA OR UR_BUILD_ADAPTER_ALL) add_test_adapter(${name} adapter_cuda) endif() - if(UR_BUILD_ADAPTER_HIP) + if(UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_ALL) add_test_adapter(${name} adapter_hip) endif() - if(UR_BUILD_ADAPTER_L0) + if(UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_ALL) add_test_adapter(${name} adapter_level_zero) endif() - if(UR_BUILD_ADAPTER_OPENCL) + if(UR_BUILD_ADAPTER_OPENCL OR UR_BUILD_ADAPTER_ALL) add_test_adapter(${name} adapter_opencl) endif() - if(UR_BUILD_ADAPTER_NATIVE_CPU) + if(UR_BUILD_ADAPTER_NATIVE_CPU OR UR_BUILD_ADAPTER_ALL) add_test_adapter(${name} adapter_native_cpu) endif() if(NOT (UR_BUILD_ADAPTER_CUDA OR UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_OPENCL - OR UR_BUILD_ADAPTER_NATIVE_CPU)) + OR UR_BUILD_ADAPTER_NATIVE_CPU OR UR_BUILD_ADAPTER_ALL)) add_test_adapter(${name} adapter_null) endif() endfunction() @@ -82,7 +84,7 @@ endfunction() add_subdirectory(testing) -add_subdirectory(runtime) +add_subdirectory(adapter) add_subdirectory(platform) add_subdirectory(device) add_subdirectory(context) diff --git a/test/conformance/README.md b/test/conformance/README.md index db90fc759b..e895a5299d 100644 --- a/test/conformance/README.md +++ b/test/conformance/README.md @@ -8,4 +8,15 @@ In the future, when all bugs are fixed, and the tests pass, this solution will no longer be necessary. When you fix any test, the match file must be updated Empty match files indicate that there are no failing tests -in a particular group for the corresponding adapter. \ No newline at end of file +in a particular group for the corresponding adapter. + +## How to set test device/platform name or limit the test devices/platforms count + +To limit how many devices/platforms you want to run the CTS on, +use CMake option UR_TEST_DEVICES_COUNT or +UR_TEST_PLATFORMS_COUNT. If you want to run the tests on +all available devices/platforms, set 0. The default value is 1. +If you run binaries for the tests, you can use the parameter +`--platforms_count=COUNT` or `--devices_count=COUNT`. +To set test device/platform name you want to run the CTS on, use +parameter `--platform=NAME` or `--device=NAME`. \ No newline at end of file diff --git a/test/conformance/runtime/CMakeLists.txt b/test/conformance/adapter/CMakeLists.txt similarity index 77% rename from test/conformance/runtime/CMakeLists.txt rename to test/conformance/adapter/CMakeLists.txt index 8c46abd82b..8d71a3cf6a 100644 --- a/test/conformance/runtime/CMakeLists.txt +++ b/test/conformance/adapter/CMakeLists.txt @@ -3,11 +3,9 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -add_conformance_test(runtime +add_conformance_test(adapter urAdapterGet.cpp urAdapterGetInfo.cpp urAdapterGetLastError.cpp urAdapterRetain.cpp - urAdapterRelease.cpp - urInit.cpp - urTearDown.cpp) + urAdapterRelease.cpp) diff --git a/test/conformance/runtime/runtime_adapter_cuda.match b/test/conformance/adapter/adapter_adapter_cuda.match similarity index 100% rename from test/conformance/runtime/runtime_adapter_cuda.match rename to test/conformance/adapter/adapter_adapter_cuda.match diff --git a/test/conformance/runtime/runtime_adapter_hip.match b/test/conformance/adapter/adapter_adapter_hip.match similarity index 100% rename from test/conformance/runtime/runtime_adapter_hip.match rename to test/conformance/adapter/adapter_adapter_hip.match diff --git a/test/conformance/runtime/runtime_adapter_level_zero.match b/test/conformance/adapter/adapter_adapter_level_zero.match similarity index 100% rename from test/conformance/runtime/runtime_adapter_level_zero.match rename to test/conformance/adapter/adapter_adapter_level_zero.match diff --git a/test/conformance/adapter/adapter_adapter_opencl.match b/test/conformance/adapter/adapter_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/runtime/fixtures.h b/test/conformance/adapter/fixtures.h similarity index 89% rename from test/conformance/runtime/fixtures.h rename to test/conformance/adapter/fixtures.h index 04f72617dd..31b2a2265d 100644 --- a/test/conformance/runtime/fixtures.h +++ b/test/conformance/adapter/fixtures.h @@ -14,15 +14,14 @@ struct urTest : ::testing::Test { ASSERT_SUCCESS(urLoaderConfigCreate(&loader_config)); ASSERT_SUCCESS(urLoaderConfigEnableLayer(loader_config, "UR_LAYER_FULL_VALIDATION")); - ASSERT_SUCCESS(urInit(device_flags, loader_config)); + ASSERT_SUCCESS(urLoaderInit(device_flags, loader_config)); } void TearDown() override { if (loader_config) { ASSERT_SUCCESS(urLoaderConfigRelease(loader_config)); } - ur_tear_down_params_t tear_down_params{}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); + ASSERT_SUCCESS(urLoaderTearDown()); } ur_loader_config_handle_t loader_config = nullptr; @@ -35,6 +34,7 @@ struct urAdapterTest : urTest { uint32_t adapter_count; ASSERT_SUCCESS(urAdapterGet(0, nullptr, &adapter_count)); + ASSERT_GT(adapter_count, 0); adapters.resize(adapter_count); ASSERT_SUCCESS(urAdapterGet(adapter_count, adapters.data(), nullptr)); } diff --git a/test/conformance/runtime/urAdapterGet.cpp b/test/conformance/adapter/urAdapterGet.cpp similarity index 100% rename from test/conformance/runtime/urAdapterGet.cpp rename to test/conformance/adapter/urAdapterGet.cpp diff --git a/test/conformance/runtime/urAdapterGetInfo.cpp b/test/conformance/adapter/urAdapterGetInfo.cpp similarity index 100% rename from test/conformance/runtime/urAdapterGetInfo.cpp rename to test/conformance/adapter/urAdapterGetInfo.cpp diff --git a/test/conformance/runtime/urAdapterGetLastError.cpp b/test/conformance/adapter/urAdapterGetLastError.cpp similarity index 100% rename from test/conformance/runtime/urAdapterGetLastError.cpp rename to test/conformance/adapter/urAdapterGetLastError.cpp diff --git a/test/conformance/runtime/urAdapterRelease.cpp b/test/conformance/adapter/urAdapterRelease.cpp similarity index 100% rename from test/conformance/runtime/urAdapterRelease.cpp rename to test/conformance/adapter/urAdapterRelease.cpp diff --git a/test/conformance/runtime/urAdapterRetain.cpp b/test/conformance/adapter/urAdapterRetain.cpp similarity index 100% rename from test/conformance/runtime/urAdapterRetain.cpp rename to test/conformance/adapter/urAdapterRetain.cpp diff --git a/test/conformance/context/context_adapter_hip.match b/test/conformance/context/context_adapter_hip.match index 129b8d392c..82d8d71397 100644 --- a/test/conformance/context/context_adapter_hip.match +++ b/test/conformance/context/context_adapter_hip.match @@ -1 +1,2 @@ urContextCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ +urContextGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}} diff --git a/test/conformance/context/context_adapter_level_zero.match b/test/conformance/context/context_adapter_level_zero.match index b66630d2e7..e43bf4d5b8 100644 --- a/test/conformance/context/context_adapter_level_zero.match +++ b/test/conformance/context/context_adapter_level_zero.match @@ -1,4 +1 @@ -urContextCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urContextGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT -{{OPT}}urContextGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_CONTEXT_INFO_USM_FILL2D_SUPPORT urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/context/context_adapter_opencl.match b/test/conformance/context/context_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/context/urContextCreateWithNativeHandle.cpp b/test/conformance/context/urContextCreateWithNativeHandle.cpp index 04ef93c0f0..69771362b4 100644 --- a/test/conformance/context/urContextCreateWithNativeHandle.cpp +++ b/test/conformance/context/urContextCreateWithNativeHandle.cpp @@ -20,7 +20,7 @@ TEST_P(urContextCreateWithNativeHandleTest, Success) { // and perform some query on it to verify that it works. ur_context_handle_t ctx = nullptr; ur_context_native_properties_t props{}; - ASSERT_SUCCESS(urContextCreateWithNativeHandle(native_context, 0, nullptr, + ASSERT_SUCCESS(urContextCreateWithNativeHandle(native_context, 1, &device, &props, &ctx)); ASSERT_NE(ctx, nullptr); diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py index ce3ca00a20..55ab134b07 100644 --- a/test/conformance/cts_exe.py +++ b/test/conformance/cts_exe.py @@ -20,9 +20,13 @@ parser = ArgumentParser() parser.add_argument("--test_command", help="Ctest test case") + parser.add_argument("--test_devices_count", type=str, help="Number of devices on which tests will be run") + parser.add_argument("--test_platforms_count", type=str, help="Number of platforms on which tests will be run") args = parser.parse_args() - result = subprocess.Popen([args.test_command, '--gtest_brief=1'], stdout = subprocess.PIPE, text = True) # nosec B603 + result = subprocess.Popen([args.test_command, '--gtest_brief=1', f'--devices_count={args.test_devices_count}', + f'--platforms_count={args.test_platforms_count}'], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) # nosec B603 pat = re.compile(r'\[( )*FAILED( )*\]') output_list = [] diff --git a/test/conformance/device/device_adapter_cuda.match b/test/conformance/device/device_adapter_cuda.match index 961e7f591d..48e00debe4 100644 --- a/test/conformance/device/device_adapter_cuda.match +++ b/test/conformance/device/device_adapter_cuda.match @@ -1,4 +1 @@ -urDeviceGetTest.InvalidValueNumEntries -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SUPPORTED_PARTITIONS -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_PARTITION_TYPE -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT +{{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_hip.match b/test/conformance/device/device_adapter_hip.match index 711bfe1224..1e498301c8 100644 --- a/test/conformance/device/device_adapter_hip.match +++ b/test/conformance/device/device_adapter_hip.match @@ -1,5 +1,2 @@ -urDeviceCreateWithNativeHandleTest.Success -urDeviceGetTest.InvalidValueNumEntries +{{OPT}}urDeviceCreateWithNativeHandleTest.Success {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime -urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SUPPORTED_PARTITIONS -urDeviceGetInfoTest.Success/UR_DEVICE_INFO_PARTITION_TYPE diff --git a/test/conformance/device/device_adapter_level_zero.match b/test/conformance/device/device_adapter_level_zero.match index 3425d66967..9711e9152b 100644 --- a/test/conformance/device/device_adapter_level_zero.match +++ b/test/conformance/device/device_adapter_level_zero.match @@ -1,4 +1,3 @@ -urDeviceGetTest.InvalidValueNumEntries {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime urDeviceGetInfoTest.Success/UR_DEVICE_INFO_IMAGE_SUPPORTED urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GLOBAL_MEM_FREE @@ -9,8 +8,6 @@ urDeviceGetInfoTest.Success/UR_DEVICE_INFO_AVAILABLE urDeviceGetInfoTest.Success/UR_DEVICE_INFO_COMPILER_AVAILABLE urDeviceGetInfoTest.Success/UR_DEVICE_INFO_LINKER_AVAILABLE urDeviceGetInfoTest.Success/UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC -urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SUPPORTED_PARTITIONS -urDeviceGetInfoTest.Success/UR_DEVICE_INFO_PARTITION_TYPE urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH urDeviceGetInfoTest.Success/UR_DEVICE_INFO_IMAGE_SRGB diff --git a/test/conformance/device/device_adapter_opencl.match b/test/conformance/device/device_adapter_opencl.match new file mode 100644 index 0000000000..716ebd54fe --- /dev/null +++ b/test/conformance/device/device_adapter_opencl.match @@ -0,0 +1 @@ +urDeviceGetInfoTest.Success/UR_DEVICE_INFO_HALF_FP_CONFIG diff --git a/test/conformance/device/urDeviceGet.cpp b/test/conformance/device/urDeviceGet.cpp index 85a4818d09..e8aa356a58 100644 --- a/test/conformance/device/urDeviceGet.cpp +++ b/test/conformance/device/urDeviceGet.cpp @@ -49,7 +49,7 @@ TEST_F(urDeviceGetTest, InvalidEnumerationDevicesType) { urDeviceGet(platform, UR_DEVICE_TYPE_FORCE_UINT32, 0, nullptr, &count)); } -TEST_F(urDeviceGetTest, InvalidValueNumEntries) { +TEST_F(urDeviceGetTest, InvalidSizeNumEntries) { uint32_t count = 0; ASSERT_SUCCESS( urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, &count)); @@ -59,3 +59,13 @@ TEST_F(urDeviceGetTest, InvalidValueNumEntries) { UR_RESULT_ERROR_INVALID_SIZE, urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, devices.data(), nullptr)); } + +TEST_F(urDeviceGetTest, InvalidNullPointerDevices) { + uint32_t count = 0; + ASSERT_SUCCESS( + urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, &count)); + ASSERT_NE(count, 0); + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, nullptr, nullptr)); +} diff --git a/test/conformance/device/urDeviceGetInfo.cpp b/test/conformance/device/urDeviceGetInfo.cpp index e5e9f7c310..757e09b6fa 100644 --- a/test/conformance/device/urDeviceGetInfo.cpp +++ b/test/conformance/device/urDeviceGetInfo.cpp @@ -240,6 +240,14 @@ INSTANTIATE_TEST_SUITE_P( return ss.str(); }); +bool doesReturnArray(ur_device_info_t info_type) { + if (info_type == UR_DEVICE_INFO_SUPPORTED_PARTITIONS || + info_type == UR_DEVICE_INFO_PARTITION_TYPE) { + return true; + } + return false; +} + TEST_P(urDeviceGetInfoTest, Success) { ur_device_info_t info_type = GetParam(); for (auto device : devices) { @@ -248,7 +256,11 @@ TEST_P(urDeviceGetInfoTest, Success) { urDeviceGetInfo(device, info_type, 0, nullptr, &size); if (result == UR_RESULT_SUCCESS) { + if (doesReturnArray(info_type) && size == 0) { + return; + } ASSERT_NE(size, 0); + if (const auto expected_size = device_info_size_map.find(info_type); expected_size != device_info_size_map.end()) { ASSERT_EQ(expected_size->second, size); diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero.match b/test/conformance/enqueue/enqueue_adapter_level_zero.match index cef0029dc7..0290c664eb 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero.match @@ -1,2 +1,3 @@ {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -Segmentation fault +{{OPT}}urEnqueueEventsWaitTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{Segmentation fault|Aborted}} diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match new file mode 100644 index 0000000000..54a5ee3762 --- /dev/null +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -0,0 +1,35 @@ +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}_ diff --git a/test/conformance/enqueue/urEnqueueEventsWait.cpp b/test/conformance/enqueue/urEnqueueEventsWait.cpp index a80c884e87..0b7db213dc 100644 --- a/test/conformance/enqueue/urEnqueueEventsWait.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWait.cpp @@ -80,4 +80,8 @@ TEST_P(urEnqueueEventsWaitTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT(urEnqueueEventsWait(queue1, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueEventsWait(queue1, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp index ca465e937e..a107ed7b9c 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp @@ -89,4 +89,9 @@ TEST_P(urEnqueueEventsWaitWithBarrierTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT( urEnqueueEventsWaitWithBarrier(queue1, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueEventsWaitWithBarrier(queue1, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index 0bca070da9..d9cb79e372 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -56,6 +56,12 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidNullPtrEventWaitList) { &global_offset, &global_size, nullptr, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions, + &global_offset, &global_size, + nullptr, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueKernelLaunchTest, InvalidWorkDimension) { @@ -210,3 +216,49 @@ TEST_P(urEnqueueKernelLaunchWithVirtualMemory, Success) { ASSERT_EQ(fill_val, data.at(i)); } } + +struct urEnqueueKernelLaunchMultiDeviceTest : public urEnqueueKernelLaunchTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urEnqueueKernelLaunchTest::SetUp()); + queues.reserve(uur::DevicesEnvironment::instance->devices.size()); + for (const auto &device : uur::DevicesEnvironment::instance->devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(this->context, device, 0, &queue)); + queues.push_back(queue); + } + } + + void TearDown() override { + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE(urEnqueueKernelLaunchTest::TearDown()); + } + + std::vector queues; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchMultiDeviceTest); + +TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { + ur_mem_handle_t buffer = nullptr; + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + AddPodArg(val); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernel, n_dimensions, + &global_offset, &global_size, nullptr, + 0, nullptr, nullptr)); + + // Wait for the queue to finish executing. + EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + + // Then the remaining queues do blocking reads from the buffer. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + uint32_t output = 0; + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, buffer, true, 0, + sizeof(output), &output, 0, + nullptr, nullptr)); + ASSERT_EQ(val, output) << "Result on queue " << i << " did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp b/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp index 3eb2308702..f226e7597a 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp @@ -74,6 +74,11 @@ TEST_P(urEnqueueMemBufferCopyTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT(urEnqueueMemBufferCopy(queue, src_buffer, dst_buffer, 0, 0, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferCopy(queue, src_buffer, dst_buffer, 0, 0, + size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferCopyTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp index f330503211..873c4953a7 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp @@ -219,6 +219,13 @@ TEST_P(urEnqueueMemBufferCopyRectTest, InvalidNullPtrEventWaitList) { src_region, size, size, size, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferCopyRect(queue, src_buffer, dst_buffer, + src_origin, dst_origin, + src_region, size, size, size, + size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } using urEnqueueMemBufferCopyRectMultiDeviceTest = diff --git a/test/conformance/enqueue/urEnqueueMemBufferFill.cpp b/test/conformance/enqueue/urEnqueueMemBufferFill.cpp index cbeae5e85c..59e7e445c9 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferFill.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferFill.cpp @@ -183,6 +183,13 @@ TEST_P(urEnqueueMemBufferFillNegativeTest, InvalidNullPtrEventWaitList) { sizeof(uint32_t), 0, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferFill(queue, buffer, &pattern, + sizeof(uint32_t), 0, size, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + ASSERT_SUCCESS(urEventRelease(validEvent)); } TEST_P(urEnqueueMemBufferFillNegativeTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp index 5ed576d6f3..fc44360c22 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp @@ -187,6 +187,12 @@ TEST_P(urEnqueueMemBufferMapTest, InvalidNullPtrEventWaitList) { 0, size, 0, &validEvent, nullptr, &map), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferMap(queue, buffer, true, + UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE, + 0, size, 1, &inv_evt, nullptr, &map), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferMapTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferRead.cpp b/test/conformance/enqueue/urEnqueueMemBufferRead.cpp index 0192333783..6410d6feed 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferRead.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferRead.cpp @@ -49,6 +49,12 @@ TEST_P(urEnqueueMemBufferReadTest, InvalidNullPtrEventWaitList) { output.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferRead(queue, buffer, true, 0, size, + output.data(), 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferReadTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp index ae0cc05332..7068985dfb 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp @@ -176,12 +176,20 @@ TEST_P(urEnqueueMemBufferReadRectTest, InvalidNullPtrEventWaitList) { host_offset, region, size, size, size, size, dst.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueMemBufferReadRect(queue, buffer, true, buffer_offset, + host_offset, region, size, size, size, size, + dst.data(), 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } using urEnqueueMemBufferReadRectMultiDeviceTest = uur::urMultiDeviceMemBufferQueueTest; -TEST_F(urEnqueueMemBufferReadRectMultiDeviceTest, WriteReadDifferentQueues) { +TEST_F(urEnqueueMemBufferReadRectMultiDeviceTest, + WriteRectReadDifferentQueues) { // First queue does a blocking write of 42 into the buffer. // Then a rectangular write the buffer as 1024x1x1 1D. std::vector input(count, 42); diff --git a/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp b/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp index 913d583058..aea6b8face 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp @@ -61,6 +61,12 @@ TEST_P(urEnqueueMemBufferWriteTest, InvalidNullPtrEventWaitList) { input.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferWrite(queue, buffer, true, 0, size, + input.data(), 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferWriteTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp index e41991e727..d3c7e5c7a3 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp @@ -183,6 +183,13 @@ TEST_P(urEnqueueMemBufferWriteRectTest, InvalidNullPtrEventWaitList) { host_offset, region, size, size, size, size, src.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueMemBufferWriteRect(queue, buffer, true, buffer_offset, + host_offset, region, size, size, size, size, + src.data(), 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferWriteRectTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp index d3cb5b566e..a22b4baa37 100644 --- a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp @@ -62,7 +62,7 @@ struct urEnqueueMemImageCopyTest void TearDown() override { if (srcImage) { - EXPECT_SUCCESS(urMemRelease(dstImage)); + EXPECT_SUCCESS(urMemRelease(srcImage)); } if (dstImage) { EXPECT_SUCCESS(urMemRelease(dstImage)); @@ -233,6 +233,12 @@ TEST_P(urEnqueueMemImageCopyTest, InvalidNullPtrEventWaitList) { {0, 0, 0}, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemImageCopy(queue, srcImage, dstImage, {0, 0, 0}, + {0, 0, 0}, size, 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemImageCopyTest, InvalidSize) { @@ -245,3 +251,63 @@ TEST_P(urEnqueueMemImageCopyTest, InvalidSize) { {1, 0, 0}, size, 0, nullptr, nullptr)); } + +using urEnqueueMemImageCopyMultiDeviceTest = + uur::urMultiDeviceMemImageWriteTest; + +TEST_F(urEnqueueMemImageCopyMultiDeviceTest, CopyReadDifferentQueues) { + ur_mem_handle_t dstImage1D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc1D, nullptr, &dstImage1D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image1D, dstImage1D, origin, + origin, region1D, 0, nullptr, + nullptr)); + + ur_mem_handle_t dstImage2D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc2D, nullptr, &dstImage2D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image2D, dstImage2D, origin, + origin, region2D, 0, nullptr, + nullptr)); + + ur_mem_handle_t dstImage3D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc3D, nullptr, &dstImage3D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image3D, dstImage3D, origin, + origin, region3D, 0, nullptr, + nullptr)); + + // Wait for the queue to finish executing. + EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + + // The remaining queues do blocking reads from the image1D/2D/3D. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + + std::vector output1D(width * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output1D.data(), 0, + nullptr, nullptr)); + + std::vector output2D(width * height * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image2D, true, origin, + region2D, 0, 0, output2D.data(), 0, + nullptr, nullptr)); + + std::vector output3D(width * height * depth * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image3D, true, origin, + region3D, 0, 0, output3D.data(), 0, + nullptr, nullptr)); + + ASSERT_EQ(input1D, output1D) + << "Result on queue " << i << " for 1D image did not match!"; + + ASSERT_EQ(input2D, output2D) + << "Result on queue " << i << " for 2D image did not match!"; + + ASSERT_EQ(input3D, output3D) + << "Result on queue " << i << " for 3D image did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemImageRead.cpp b/test/conformance/enqueue/urEnqueueMemImageRead.cpp index d40625c3e1..d4cf322958 100644 --- a/test/conformance/enqueue/urEnqueueMemImageRead.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageRead.cpp @@ -69,6 +69,12 @@ TEST_P(urEnqueueMemImageReadTest, InvalidNullPtrEventWaitList) { region1D, 0, 0, output.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output.data(), 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemImageReadTest, InvalidOrigin1D) { @@ -124,3 +130,39 @@ TEST_P(urEnqueueMemImageReadTest, InvalidRegion3D) { bad_region, 0, 0, output.data(), 0, nullptr, nullptr)); } + +using urEnqueueMemImageReadMultiDeviceTest = + uur::urMultiDeviceMemImageWriteTest; + +TEST_F(urEnqueueMemImageReadMultiDeviceTest, WriteReadDifferentQueues) { + // The remaining queues do blocking reads from the image1D/2D/3D. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + + std::vector output1D(width * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output1D.data(), 0, + nullptr, nullptr)); + + std::vector output2D(width * height * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image2D, true, origin, + region2D, 0, 0, output2D.data(), 0, + nullptr, nullptr)); + + std::vector output3D(width * height * depth * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image3D, true, origin, + region3D, 0, 0, output3D.data(), 0, + nullptr, nullptr)); + + ASSERT_EQ(input1D, output1D) + << "Result on queue " << i << " for 1D image did not match!"; + + ASSERT_EQ(input2D, output2D) + << "Result on queue " << i << " for 2D image did not match!"; + + ASSERT_EQ(input3D, output3D) + << "Result on queue " << i << " for 3D image did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemImageWrite.cpp b/test/conformance/enqueue/urEnqueueMemImageWrite.cpp index 7f7968bdff..76b5f0b4dd 100644 --- a/test/conformance/enqueue/urEnqueueMemImageWrite.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageWrite.cpp @@ -66,6 +66,12 @@ TEST_P(urEnqueueMemImageWriteTest, InvalidNullPtrEventWaitList) { region1D, 0, 0, input.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemImageWrite(queue, image1D, true, origin, + region1D, 0, 0, input.data(), 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemImageWriteTest, InvalidOrigin1D) { diff --git a/test/conformance/enqueue/urEnqueueMemUnmap.cpp b/test/conformance/enqueue/urEnqueueMemUnmap.cpp index a205ded3b9..046d3088d9 100644 --- a/test/conformance/enqueue/urEnqueueMemUnmap.cpp +++ b/test/conformance/enqueue/urEnqueueMemUnmap.cpp @@ -50,4 +50,9 @@ TEST_P(urEnqueueMemUnmapTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT( urEnqueueMemUnmap(queue, buffer, map, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueMemUnmap(queue, buffer, map, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueReadHostPipe.cpp b/test/conformance/enqueue/urEnqueueReadHostPipe.cpp index 93e82b6531..379ee23f9d 100644 --- a/test/conformance/enqueue/urEnqueueReadHostPipe.cpp +++ b/test/conformance/enqueue/urEnqueueReadHostPipe.cpp @@ -76,4 +76,10 @@ TEST_P(urEnqueueReadHostPipeTest, InvalidEventWaitList) { urEnqueueReadHostPipe(queue, program, pipe_symbol, /*blocking*/ true, &buffer, size, 0, &validEvent, nullptr)); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueReadHostPipe(queue, program, pipe_symbol, + /*blocking*/ true, &buffer, size, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueUSMFill.cpp b/test/conformance/enqueue/urEnqueueUSMFill.cpp index e595056035..815c5bd2a5 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill.cpp @@ -203,4 +203,10 @@ TEST_P(urEnqueueUSMFillNegativeTest, InvalidEventWaitList) { ASSERT_EQ_RESULT(urEnqueueUSMFill(queue, ptr, pattern_size, pattern.data(), size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMFill(queue, ptr, pattern_size, pattern.data(), + size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + ASSERT_SUCCESS(urEventRelease(validEvent)); } diff --git a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp index 9cd5bc7591..37470d40e2 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp @@ -66,8 +66,8 @@ struct urEnqueueUSMFill2DTestWithParam nullptr, nullptr)); size_t pattern_index = 0; - for (size_t w = 0; w < width; ++w) { - for (size_t h = 0; h < height; ++h) { + for (size_t h = 0; h < height; ++h) { + for (size_t w = 0; w < width; ++w) { uint8_t *host_ptr = host_mem.data(); size_t index = (pitch * h) + w; ASSERT_TRUE((*(host_ptr + index) == pattern[pattern_index])); @@ -273,4 +273,11 @@ TEST_P(urEnqueueUSMFill2DNegativeTest, InvalidNullPtrEventWaitList) { pattern.data(), width, 1, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMFill2D(queue, ptr, pitch, pattern_size, + pattern.data(), width, 1, 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + ASSERT_SUCCESS(urEventRelease(validEvent)); } diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp index e1af65896c..6cd16546e9 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp @@ -115,6 +115,7 @@ TEST_P(urEnqueueUSMMemcpyTest, NonBlocking) { allocation_size, 0, nullptr, &memcpy_event)); ASSERT_SUCCESS(urEventWait(1, &memcpy_event)); + ASSERT_SUCCESS(urEventRelease(memcpy_event)); ASSERT_NO_FATAL_FAILURE(verifyData()); } @@ -158,6 +159,11 @@ TEST_P(urEnqueueUSMMemcpyTest, InvalidNullPtrEventWaitList) { allocation_size, 0, &memset_event, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMMemcpy(queue, true, device_dst, device_src, + allocation_size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueUSMMemcpyTest); diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp index e56d2a02c9..8eaed4b743 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp @@ -104,6 +104,7 @@ TEST_P(urEnqueueUSMMemcpy2DTestWithParam, SuccessNonBlocking) { ASSERT_SUCCESS(uur::GetEventInfo( memcpy_event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, event_status)); ASSERT_EQ(event_status, UR_EVENT_STATUS_COMPLETE); + ASSERT_SUCCESS(urEventRelease(memcpy_event)); ASSERT_NO_FATAL_FAILURE(verifyMemcpySucceeded()); } @@ -169,7 +170,7 @@ TEST_P(urEnqueueUSMMemcpy2DNegativeTest, InvalidSize) { TEST_P(urEnqueueUSMMemcpy2DNegativeTest, InvalidEventWaitList) { // enqueue something to get an event ur_event_handle_t event = nullptr; - int fill_pattern = 14; + uint8_t fill_pattern = 14; ASSERT_SUCCESS(urEnqueueUSMFill2D(queue, pDst, pitch, sizeof(fill_pattern), &fill_pattern, width, height, 0, nullptr, &event)); @@ -183,5 +184,10 @@ TEST_P(urEnqueueUSMMemcpy2DNegativeTest, InvalidEventWaitList) { urEnqueueUSMMemcpy2D(queue, true, pDst, pitch, pSrc, pitch, width, height, 0, &event, nullptr)); + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMMemcpy2D(queue, true, pDst, pitch, pSrc, pitch, + width, height, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + ASSERT_SUCCESS(urEventRelease(event)); } diff --git a/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp b/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp index 70b93a55b7..cd7b087876 100644 --- a/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp +++ b/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp @@ -122,4 +122,10 @@ TEST_P(urEnqueueUSMPrefetchTest, InvalidEventWaitList) { urEnqueueUSMPrefetch(queue, ptr, allocation_size, UR_USM_MIGRATION_FLAG_DEFAULT, 0, &validEvent, nullptr)); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMPrefetch(queue, ptr, allocation_size, + UR_USM_MIGRATION_FLAG_DEFAULT, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp b/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp index 20ef1da38b..86e3b99fe3 100644 --- a/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp +++ b/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp @@ -55,18 +55,6 @@ TEST_P(urEnqueueWriteHostPipeTest, InvalidNullPointerBuffer) { &phEventWaitList, phEvent)); } -TEST_P(urEnqueueWriteHostPipeTest, InvalidNullPointerEvent) { - uint32_t numEventsInWaitList = 0; - ur_event_handle_t phEventWaitList; - ur_event_handle_t *phEvent = nullptr; - - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEnqueueWriteHostPipe(queue, program, pipe_symbol, - /*blocking*/ true, &buffer, size, - numEventsInWaitList, - &phEventWaitList, phEvent)); -} - TEST_P(urEnqueueWriteHostPipeTest, InvalidEventWaitList) { ur_event_handle_t phEventWaitList; ur_event_handle_t *phEvent = nullptr; @@ -88,4 +76,10 @@ TEST_P(urEnqueueWriteHostPipeTest, InvalidEventWaitList) { urEnqueueWriteHostPipe(queue, program, pipe_symbol, /*blocking*/ true, &buffer, size, 0, &validEvent, nullptr)); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueWriteHostPipe(queue, program, pipe_symbol, + /*blocking*/ true, &buffer, size, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/event/event_adapter_cuda.match b/test/conformance/event/event_adapter_cuda.match index 19f3ddeba0..e40ea36db6 100644 --- a/test/conformance/event/event_adapter_cuda.match +++ b/test/conformance/event/event_adapter_cuda.match @@ -2,4 +2,3 @@ urEventSetCallbackTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ urEventSetCallbackTest.ValidateParameters/NVIDIA_CUDA_BACKEND___{{.*}}_ urEventSetCallbackTest.AllStates/NVIDIA_CUDA_BACKEND___{{.*}}_ urEventSetCallbackTest.EventAlreadyCompleted/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackNegativeTest.InvalidNullHandle/NVIDIA_CUDA_BACKEND___{{.*}}_ diff --git a/test/conformance/event/event_adapter_hip.match b/test/conformance/event/event_adapter_hip.match index 21532dac49..8682cdf4a6 100644 --- a/test/conformance/event/event_adapter_hip.match +++ b/test/conformance/event/event_adapter_hip.match @@ -2,4 +2,3 @@ urEventSetCallbackTest.Success/AMD_HIP_BACKEND___{{.*}}_ urEventSetCallbackTest.ValidateParameters/AMD_HIP_BACKEND___{{.*}}_ urEventSetCallbackTest.AllStates/AMD_HIP_BACKEND___{{.*}}_ urEventSetCallbackTest.EventAlreadyCompleted/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackNegativeTest.InvalidNullHandle/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/event/event_adapter_opencl.match b/test/conformance/event/event_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/event/fixtures.h b/test/conformance/event/fixtures.h index c1d5f7a7da..ee16f2152d 100644 --- a/test/conformance/event/fixtures.h +++ b/test/conformance/event/fixtures.h @@ -65,6 +65,7 @@ struct urEventReferenceTest : uur::urProfilingQueueTest { input.assign(count, 42); ASSERT_SUCCESS(urEnqueueMemBufferWrite( queue, buffer, false, 0, size, input.data(), 0, nullptr, &event)); + ASSERT_SUCCESS(urEventWait(1, &event)); } void TearDown() override { diff --git a/test/conformance/event/urEventSetCallback.cpp b/test/conformance/event/urEventSetCallback.cpp index 18eb8e00db..ac6f988e71 100644 --- a/test/conformance/event/urEventSetCallback.cpp +++ b/test/conformance/event/urEventSetCallback.cpp @@ -14,8 +14,9 @@ using urEventSetCallbackTest = uur::event::urEventReferenceTest; TEST_P(urEventSetCallbackTest, Success) { struct Callback { - static void callback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, void *pUserData) { + static void callback([[maybe_unused]] ur_event_handle_t hEvent, + [[maybe_unused]] ur_execution_info_t execStatus, + void *pUserData) { auto status = reinterpret_cast(pUserData); *status = true; @@ -24,7 +25,7 @@ TEST_P(urEventSetCallbackTest, Success) { bool didRun = false; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, &didRun)); ASSERT_SUCCESS(urEventWait(1, &event)); @@ -55,13 +56,13 @@ TEST_P(urEventSetCallbackTest, ValidateParameters) { CallbackParameters parameters{}; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, ¶meters)); ASSERT_SUCCESS(urEventWait(1, &event)); ASSERT_SUCCESS(urEventRelease(event)); ASSERT_EQ(event, parameters.event); - ASSERT_EQ(ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + ASSERT_EQ(ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, parameters.execStatus); } @@ -71,34 +72,26 @@ TEST_P(urEventSetCallbackTest, ValidateParameters) { TEST_P(urEventSetCallbackTest, AllStates) { struct CallbackStatus { - bool queued = false; bool submitted = false; bool running = false; bool complete = false; }; struct Callback { - static void callback(ur_event_handle_t hEvent, + static void callback([[maybe_unused]] ur_event_handle_t hEvent, ur_execution_info_t execStatus, void *pUserData) { auto status = reinterpret_cast(pUserData); switch (execStatus) { - case ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: { - status->queued = true; - break; - } - case ur_execution_info_t:: - UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: { + case ur_execution_info_t::UR_EXECUTION_INFO_SUBMITTED: { status->submitted = true; break; } - case ur_execution_info_t:: - UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: { + case ur_execution_info_t::UR_EXECUTION_INFO_RUNNING: { status->running = true; break; } - case ur_execution_info_t:: - UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: { + case ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE: { status->complete = true; break; } @@ -112,22 +105,18 @@ TEST_P(urEventSetCallbackTest, AllStates) { CallbackStatus status{}; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED, - Callback::callback, &status)); - ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED, + event, ur_execution_info_t::UR_EXECUTION_INFO_SUBMITTED, Callback::callback, &status)); ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING, + event, ur_execution_info_t::UR_EXECUTION_INFO_RUNNING, Callback::callback, &status)); ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, &status)); ASSERT_SUCCESS(urEventWait(1, &event)); ASSERT_SUCCESS(urEventRelease(event)); - ASSERT_TRUE(status.queued); ASSERT_TRUE(status.submitted); ASSERT_TRUE(status.running); ASSERT_TRUE(status.complete); @@ -142,8 +131,9 @@ TEST_P(urEventSetCallbackTest, EventAlreadyCompleted) { ASSERT_SUCCESS(urEventWait(1, &event)); struct Callback { - static void callback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, void *pUserData) { + static void callback([[maybe_unused]] ur_event_handle_t hEvent, + [[maybe_unused]] ur_execution_info_t execStatus, + void *pUserData) { auto status = reinterpret_cast(pUserData); *status = true; @@ -153,7 +143,7 @@ TEST_P(urEventSetCallbackTest, EventAlreadyCompleted) { bool didRun = false; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, &didRun)); ASSERT_SUCCESS(urEventRelease(event)); @@ -165,23 +155,20 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventSetCallbackTest); /* Negative tests */ using urEventSetCallbackNegativeTest = uur::event::urEventTest; -void emptyCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, - void *pUserData) {} +void emptyCallback(ur_event_handle_t, ur_execution_info_t, void *) {} -TEST_P(urEventSetCallbackNegativeTest, InvalidNullHandle) { - - ASSERT_EQ_RESULT( - urEventSetCallback( - nullptr, - ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED, - emptyCallback, nullptr), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); +TEST_P(urEventSetCallbackNegativeTest, InvalidNullHandleEvent) { + ASSERT_EQ_RESULT(urEventSetCallback( + nullptr, ur_execution_info_t::UR_EXECUTION_INFO_QUEUED, + emptyCallback, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} +TEST_P(urEventSetCallbackNegativeTest, InvalidNullPointerCallback) { ASSERT_EQ_RESULT( - urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED, - nullptr, nullptr), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); + urEventSetCallback(event, ur_execution_info_t::UR_EXECUTION_INFO_QUEUED, + nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_P(urEventSetCallbackNegativeTest, InvalidEnumeration) { diff --git a/test/conformance/kernel/kernel_adapter_level_zero.match b/test/conformance/kernel/kernel_adapter_level_zero.match index 7a1c0d5b8e..2668b6821a 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero.match +++ b/test/conformance/kernel/kernel_adapter_level_zero.match @@ -1 +1,25 @@ -Segmentation fault +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_ARGS +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_REFERENCE_COUNT +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_CONTEXT +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_PROGRAM +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_ATTRIBUTES +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_REGS +urKernelSetArgLocalTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgPointerTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgPointerTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgPointerTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/kernel/kernel_adapter_opencl.match b/test/conformance/kernel/kernel_adapter_opencl.match new file mode 100644 index 0000000000..799225be19 --- /dev/null +++ b/test/conformance/kernel/kernel_adapter_opencl.match @@ -0,0 +1,5 @@ +urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__OpenCL___{{.*}}_ +urKernelSetSpecializationConstantsTest.Success/Intel_R__OpenCL___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__OpenCL___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__OpenCL___{{.*}}_ diff --git a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp index 7575fb309f..ab01069c82 100644 --- a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp +++ b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp @@ -25,7 +25,7 @@ struct urKernelCreateWithNativeHandleTest : uur::urKernelTest { ur_kernel_native_properties_t properties = { UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES, /*sType*/ nullptr, /*pNext*/ - true /*isNativeHandleOwned*/ + false /*isNativeHandleOwned*/ }; }; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelCreateWithNativeHandleTest); @@ -56,15 +56,8 @@ TEST_P(urKernelCreateWithNativeHandleTest, InvalidNullHandleProgram) { &properties, &native_kernel)); } -TEST_P(urKernelCreateWithNativeHandleTest, InvalidNullPointerProperties) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urKernelCreateWithNativeHandle(native_kernel_handle, - context, program, nullptr, - &native_kernel)); -} - TEST_P(urKernelCreateWithNativeHandleTest, InvalidNullPointerNativeKernel) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urKernelCreateWithNativeHandle(native_kernel_handle, context, program, &properties, nullptr)); diff --git a/test/conformance/kernel/urKernelSetArgPointer.cpp b/test/conformance/kernel/urKernelSetArgPointer.cpp index 4cb4cb2c8f..11d26778c5 100644 --- a/test/conformance/kernel/urKernelSetArgPointer.cpp +++ b/test/conformance/kernel/urKernelSetArgPointer.cpp @@ -15,7 +15,7 @@ struct urKernelSetArgPointerTest : uur::urKernelExecutionTest { if (allocation) { ASSERT_SUCCESS(urUSMFree(context, allocation)); } - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); } void ValidateAllocation(void *pointer) { @@ -129,9 +129,9 @@ struct urKernelSetArgPointerNegativeTest : urKernelSetArgPointerTest { } void SetUp() { + UUR_RETURN_ON_FATAL_FAILURE(urKernelSetArgPointerTest::SetUp()); SetUpAllocation(); ASSERT_NE(allocation, nullptr); - UUR_RETURN_ON_FATAL_FAILURE(urKernelSetArgPointerTest::SetUp()); } }; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelSetArgPointerNegativeTest); diff --git a/test/conformance/kernel/urKernelSetArgSampler.cpp b/test/conformance/kernel/urKernelSetArgSampler.cpp index 814b79a153..37cb3401f2 100644 --- a/test/conformance/kernel/urKernelSetArgSampler.cpp +++ b/test/conformance/kernel/urKernelSetArgSampler.cpp @@ -5,8 +5,10 @@ #include -struct urKernelSetArgSamplerTest : uur::urKernelTest { +struct urKernelSetArgSamplerTest : uur::urBaseKernelTest { void SetUp() { + program_name = "image_copy"; + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::SetUp()); // Images and samplers are not available on AMD ur_platform_backend_t backend; ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, @@ -14,9 +16,7 @@ struct urKernelSetArgSamplerTest : uur::urKernelTest { if (backend == UR_PLATFORM_BACKEND_HIP) { GTEST_SKIP() << "Sampler are not supported on hip."; } - - program_name = "image_copy"; - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + Build(); ur_sampler_desc_t sampler_desc = { UR_STRUCTURE_TYPE_SAMPLER_DESC, /* sType */ nullptr, /* pNext */ @@ -31,7 +31,7 @@ struct urKernelSetArgSamplerTest : uur::urKernelTest { if (sampler) { ASSERT_SUCCESS(urSamplerRelease(sampler)); } - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::TearDown()); } ur_sampler_handle_t sampler = nullptr; diff --git a/test/conformance/kernel/urKernelSetSpecializationConstants.cpp b/test/conformance/kernel/urKernelSetSpecializationConstants.cpp index 9b2bce7208..665a20de4a 100644 --- a/test/conformance/kernel/urKernelSetSpecializationConstants.cpp +++ b/test/conformance/kernel/urKernelSetSpecializationConstants.cpp @@ -5,10 +5,10 @@ #include -struct urKernelSetSpecializationConstantsTest : uur::urKernelExecutionTest { +struct urKernelSetSpecializationConstantsTest : uur::urBaseKernelExecutionTest { void SetUp() override { program_name = "spec_constant"; - UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelExecutionTest::SetUp()); bool supports_kernel_spec_constant = false; ASSERT_SUCCESS(urDeviceGetInfo( device, UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS, @@ -18,6 +18,7 @@ struct urKernelSetSpecializationConstantsTest : uur::urKernelExecutionTest { GTEST_SKIP() << "Device does not support setting kernel spec constants."; } + Build(); } uint32_t spec_value = 42; diff --git a/test/conformance/memory/memory_adapter_cuda.match b/test/conformance/memory/memory_adapter_cuda.match index 3b3da22161..5fe265ae8e 100644 --- a/test/conformance/memory/memory_adapter_cuda.match +++ b/test/conformance/memory/memory_adapter_cuda.match @@ -1,10 +1,6 @@ urMemBufferCreateWithNativeHandleTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urMemGetInfoTest.InvalidNullPointerParamValue/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerParamValue/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoImageTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoImageTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT +{{OPT}}urMemGetInfoImageTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_SIZE +{{OPT}}urMemGetInfoImageTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT {{OPT}}urMemImageGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_IMAGE_INFO_FORMAT {{OPT}}urMemImageGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE {{OPT}}urMemImageGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_IMAGE_INFO_ROW_PITCH diff --git a/test/conformance/memory/memory_adapter_hip.match b/test/conformance/memory/memory_adapter_hip.match index c6d4bdacfe..02760dcb8a 100644 --- a/test/conformance/memory/memory_adapter_hip.match +++ b/test/conformance/memory/memory_adapter_hip.match @@ -1,9 +1,7 @@ -urMemBufferCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT -{{OPT}}urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ -{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}Segmentation fault +{{OPT}}urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} diff --git a/test/conformance/memory/memory_adapter_level_zero.match b/test/conformance/memory/memory_adapter_level_zero.match index 00b085926f..ff51b24b4b 100644 --- a/test/conformance/memory/memory_adapter_level_zero.match +++ b/test/conformance/memory/memory_adapter_level_zero.match @@ -2,9 +2,5 @@ urMemBufferCreateTest.InvalidBufferSizeZero/Intel_R__oneAPI_Unified_Runtime_over urMemBufferPartitionTest.InvalidBufferSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE {{Segmentation fault|Aborted}} diff --git a/test/conformance/memory/memory_adapter_opencl.match b/test/conformance/memory/memory_adapter_opencl.match new file mode 100644 index 0000000000..c01e55d804 --- /dev/null +++ b/test/conformance/memory/memory_adapter_opencl.match @@ -0,0 +1 @@ +urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}}_ diff --git a/test/conformance/memory/urMemGetInfo.cpp b/test/conformance/memory/urMemGetInfo.cpp index fcfcd429c5..71b9ccfcc2 100644 --- a/test/conformance/memory/urMemGetInfo.cpp +++ b/test/conformance/memory/urMemGetInfo.cpp @@ -65,13 +65,13 @@ TEST_P(urMemGetInfoTest, InvalidNullPointerParamValue) { size_t mem_size = 0; ASSERT_EQ_RESULT(urMemGetInfo(buffer, UR_MEM_INFO_SIZE, sizeof(mem_size), nullptr, nullptr), - UR_RESULT_ERROR_INVALID_SIZE); + UR_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_P(urMemGetInfoTest, InvalidNullPointerPropSizeRet) { ASSERT_EQ_RESULT( urMemGetInfo(buffer, UR_MEM_INFO_SIZE, 0, nullptr, nullptr), - UR_RESULT_ERROR_INVALID_SIZE); + UR_RESULT_ERROR_INVALID_NULL_POINTER); } using urMemGetInfoImageTest = uur::urMemImageTestWithParam; diff --git a/test/conformance/platform/fixtures.h b/test/conformance/platform/fixtures.h index 5b532fb433..b294e7031a 100644 --- a/test/conformance/platform/fixtures.h +++ b/test/conformance/platform/fixtures.h @@ -17,7 +17,7 @@ struct urTest : ::testing::Test { ASSERT_SUCCESS(urLoaderConfigCreate(&loader_config)); ASSERT_SUCCESS(urLoaderConfigEnableLayer(loader_config, "UR_LAYER_FULL_VALIDATION")); - ASSERT_SUCCESS(urInit(device_flags, loader_config)); + ASSERT_SUCCESS(urLoaderInit(device_flags, loader_config)); uint32_t adapter_count; ASSERT_SUCCESS(urAdapterGet(0, nullptr, &adapter_count)); @@ -32,8 +32,7 @@ struct urTest : ::testing::Test { if (loader_config) { ASSERT_SUCCESS(urLoaderConfigRelease(loader_config)); } - ur_tear_down_params_t tear_down_params{}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); + ASSERT_SUCCESS(urLoaderTearDown()); } ur_loader_config_handle_t loader_config = nullptr; diff --git a/test/conformance/platform/platform_adapter_hip.match b/test/conformance/platform/platform_adapter_hip.match index df63fbef05..e69de29bb2 100644 --- a/test/conformance/platform/platform_adapter_hip.match +++ b/test/conformance/platform/platform_adapter_hip.match @@ -1 +0,0 @@ -urPlatformGetTest.InvalidNumEntries diff --git a/test/conformance/platform/platform_adapter_opencl.match b/test/conformance/platform/platform_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/program/program_adapter_opencl.match b/test/conformance/program/program_adapter_opencl.match new file mode 100644 index 0000000000..716bf27d9d --- /dev/null +++ b/test/conformance/program/program_adapter_opencl.match @@ -0,0 +1,10 @@ +urProgramCreateWithBinaryTest.Success/Intel_R__OpenCL___{{.*}}_ +urProgramCreateWithBinaryTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ +urProgramCreateWithBinaryTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}_ +urProgramCreateWithBinaryTest.InvalidNullPointerBinary/Intel_R__OpenCL___{{.*}}_ +urProgramCreateWithBinaryTest.InvalidNullPointerProgram/Intel_R__OpenCL___{{.*}}_ +urProgramCreateWithBinaryTest.InvalidNullPointerMetadata/Intel_R__OpenCL___{{.*}}_ +urProgramCreateWithBinaryTest.InvalidSizePropertyCount/Intel_R__OpenCL___{{.*}}_ +urProgramGetFunctionPointerTest.InvalidFunctionName/Intel_R__OpenCL___{{.*}}_ +urProgramGetInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_PROGRAM_INFO_SOURCE +urProgramGetInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_PROGRAM_INFO_BINARIES diff --git a/test/conformance/program/urProgramCreateWithNativeHandle.cpp b/test/conformance/program/urProgramCreateWithNativeHandle.cpp index 7e0400d294..e121c61de8 100644 --- a/test/conformance/program/urProgramCreateWithNativeHandle.cpp +++ b/test/conformance/program/urProgramCreateWithNativeHandle.cpp @@ -41,12 +41,12 @@ TEST_P(urProgramCreateWithNativeHandleTest, Success) { TEST_P(urProgramCreateWithNativeHandleTest, InvalidNullHandleContext) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urProgramCreateWithNativeHandle(native_program_handle, - context, nullptr, + nullptr, nullptr, &native_program)); } TEST_P(urProgramCreateWithNativeHandleTest, InvalidNullPointerProgram) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urProgramCreateWithNativeHandle( native_program_handle, context, nullptr, nullptr)); } diff --git a/test/conformance/program/urProgramGetBuildInfo.cpp b/test/conformance/program/urProgramGetBuildInfo.cpp index 59eccd4a65..ddc0ff998c 100644 --- a/test/conformance/program/urProgramGetBuildInfo.cpp +++ b/test/conformance/program/urProgramGetBuildInfo.cpp @@ -36,17 +36,21 @@ TEST_P(urProgramGetBuildInfoTest, Success) { TEST_P(urProgramGetBuildInfoTest, InvalidNullHandleProgram) { ur_program_build_status_t programBuildStatus = UR_PROGRAM_BUILD_STATUS_ERROR; - ASSERT_SUCCESS(urProgramGetBuildInfo( - nullptr, device, UR_PROGRAM_BUILD_INFO_STATUS, - sizeof(programBuildStatus), &programBuildStatus, nullptr)); + ASSERT_EQ_RESULT(urProgramGetBuildInfo(nullptr, device, + UR_PROGRAM_BUILD_INFO_STATUS, + sizeof(programBuildStatus), + &programBuildStatus, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urProgramGetBuildInfoTest, InvalidNullHandleDevice) { ur_program_build_status_t programBuildStatus = UR_PROGRAM_BUILD_STATUS_ERROR; - ASSERT_SUCCESS(urProgramGetBuildInfo( - program, nullptr, UR_PROGRAM_BUILD_INFO_STATUS, - sizeof(programBuildStatus), &programBuildStatus, nullptr)); + ASSERT_EQ_RESULT(urProgramGetBuildInfo(program, nullptr, + UR_PROGRAM_BUILD_INFO_STATUS, + sizeof(programBuildStatus), + &programBuildStatus, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urProgramGetBuildInfoTest, InvalidEnumeration) { diff --git a/test/conformance/program/urProgramGetInfo.cpp b/test/conformance/program/urProgramGetInfo.cpp index 8e18dc7b87..80d00072e7 100644 --- a/test/conformance/program/urProgramGetInfo.cpp +++ b/test/conformance/program/urProgramGetInfo.cpp @@ -5,7 +5,14 @@ #include -using urProgramGetInfoTest = uur::urProgramTestWithParam; +struct urProgramGetInfoTest : uur::urProgramTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urProgramTestWithParam::SetUp()); + // Some queries need the program to be built. + ASSERT_SUCCESS(urProgramBuild(this->context, program, nullptr)); + } +}; UUR_TEST_SUITE_P( urProgramGetInfoTest, @@ -29,8 +36,9 @@ TEST_P(urProgramGetInfoTest, Success) { TEST_P(urProgramGetInfoTest, InvalidNullHandleProgram) { uint32_t ref_count = 0; - ASSERT_SUCCESS(urProgramGetInfo(nullptr, UR_PROGRAM_INFO_REFERENCE_COUNT, - sizeof(ref_count), &ref_count, nullptr)); + ASSERT_EQ_RESULT(urProgramGetInfo(nullptr, UR_PROGRAM_INFO_REFERENCE_COUNT, + sizeof(ref_count), &ref_count, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urProgramGetInfoTest, InvalidEnumeration) { diff --git a/test/conformance/queue/queue_adapter_cuda.match b/test/conformance/queue/queue_adapter_cuda.match index f7967fb388..e69de29bb2 100644 --- a/test/conformance/queue/queue_adapter_cuda.match +++ b/test/conformance/queue/queue_adapter_cuda.match @@ -1,4 +0,0 @@ -urQueueCreateTest.InvalidQueueProperties/NVIDIA_CUDA_BACKEND___{{.*}}_ -urQueueCreateWithNativeHandleTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urQueueGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_QUEUE_INFO_DEVICE_DEFAULT -urQueueGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_QUEUE_INFO_SIZE diff --git a/test/conformance/queue/queue_adapter_hip.match b/test/conformance/queue/queue_adapter_hip.match index 46a22304cd..d39b30aa73 100644 --- a/test/conformance/queue/queue_adapter_hip.match +++ b/test/conformance/queue/queue_adapter_hip.match @@ -1,3 +1,2 @@ -urQueueCreateTest.InvalidQueueProperties/AMD_HIP_BACKEND___{{.*}}_ urQueueGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_INFO_DEVICE_DEFAULT urQueueGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_INFO_SIZE diff --git a/test/conformance/queue/queue_adapter_level_zero.match b/test/conformance/queue/queue_adapter_level_zero.match index 9ceebd4233..c97aeab323 100644 --- a/test/conformance/queue/queue_adapter_level_zero.match +++ b/test/conformance/queue/queue_adapter_level_zero.match @@ -1,2 +1 @@ -urQueueCreateTest.InvalidQueueProperties/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{Segmentation fault|Aborted}} diff --git a/test/conformance/queue/queue_adapter_opencl.match b/test/conformance/queue/queue_adapter_opencl.match new file mode 100644 index 0000000000..a374e0b4b1 --- /dev/null +++ b/test/conformance/queue/queue_adapter_opencl.match @@ -0,0 +1,2 @@ +urQueueGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_QUEUE_INFO_DEVICE_DEFAULT +urQueueGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_QUEUE_INFO_SIZE diff --git a/test/conformance/queue/urQueueCreate.cpp b/test/conformance/queue/urQueueCreate.cpp index 90813b20a5..1c5ca6e614 100644 --- a/test/conformance/queue/urQueueCreate.cpp +++ b/test/conformance/queue/urQueueCreate.cpp @@ -75,7 +75,7 @@ TEST_P(urQueueCreateTest, InvalidQueueProperties) { // Initial value is just not a valid enum { ur_queue_handle_t queue = nullptr; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES, + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, urQueueCreate(context, device, &props, &queue)); } // It should be an error to specify both low/high priorities diff --git a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp index 01e7ca16d5..9f7588601f 100644 --- a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp +++ b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp @@ -23,9 +23,9 @@ TEST_P(urQueueCreateWithNativeHandleTest, Success) { &properties, &q)); ASSERT_NE(q, nullptr); - uint32_t q_size = 0; - ASSERT_SUCCESS(urQueueGetInfo(q, UR_QUEUE_INFO_SIZE, sizeof(uint32_t), - &q_size, nullptr)); - + ur_context_handle_t q_context = nullptr; + ASSERT_SUCCESS(urQueueGetInfo(q, UR_QUEUE_INFO_CONTEXT, sizeof(q_context), + &q_context, nullptr)); + ASSERT_EQ(q_context, context); ASSERT_SUCCESS(urQueueRelease(q)); } diff --git a/test/conformance/queue/urQueueFinish.cpp b/test/conformance/queue/urQueueFinish.cpp index fd557c21b2..069f8b5d67 100644 --- a/test/conformance/queue/urQueueFinish.cpp +++ b/test/conformance/queue/urQueueFinish.cpp @@ -25,7 +25,7 @@ TEST_P(urQueueFinishTest, Success) { ur_event_status_t exec_status; ASSERT_SUCCESS(urEventGetInfo(event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(exec_status), &exec_status, nullptr)); - ASSERT_EQ(exec_status, UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE); + ASSERT_EQ(exec_status, UR_EXECUTION_INFO_COMPLETE); } TEST_P(urQueueFinishTest, InvalidNullHandleQueue) { diff --git a/test/conformance/queue/urQueueGetInfo.cpp b/test/conformance/queue/urQueueGetInfo.cpp index 5f8100b612..9269e4de30 100644 --- a/test/conformance/queue/urQueueGetInfo.cpp +++ b/test/conformance/queue/urQueueGetInfo.cpp @@ -29,17 +29,22 @@ UUR_TEST_SUITE_P(urQueueGetInfoTestWithInfoParam, TEST_P(urQueueGetInfoTestWithInfoParam, Success) { ur_queue_info_t info_type = getParam(); size_t size = 0; - ASSERT_SUCCESS(urQueueGetInfo(queue, info_type, 0, nullptr, &size)); - ASSERT_NE(size, 0); + auto result = urQueueGetInfo(queue, info_type, 0, nullptr, &size); - if (const auto expected_size = queue_info_size_map.find(info_type); - expected_size != queue_info_size_map.end()) { - ASSERT_EQ(expected_size->second, size); - } + if (result == UR_RESULT_SUCCESS) { + ASSERT_NE(size, 0); + + if (const auto expected_size = queue_info_size_map.find(info_type); + expected_size != queue_info_size_map.end()) { + ASSERT_EQ(expected_size->second, size); + } - std::vector data(size); - ASSERT_SUCCESS( - urQueueGetInfo(queue, info_type, size, data.data(), nullptr)); + std::vector data(size); + ASSERT_SUCCESS( + urQueueGetInfo(queue, info_type, size, data.data(), nullptr)); + } else { + ASSERT_EQ_RESULT(result, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); + } } using urQueueGetInfoTest = uur::urQueueTest; diff --git a/test/conformance/runtime/urTearDown.cpp b/test/conformance/runtime/urTearDown.cpp deleted file mode 100644 index 3639515f82..0000000000 --- a/test/conformance/runtime/urTearDown.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2022-2023 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include - -struct urTearDownTest : testing::Test { - void SetUp() override { - ur_device_init_flags_t device_flags = 0; - ASSERT_SUCCESS(urInit(device_flags, nullptr)); - } -}; - -TEST_F(urTearDownTest, Success) { - ur_tear_down_params_t tear_down_params{}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); -} - -TEST_F(urTearDownTest, InvalidNullPointerParams) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urTearDown(nullptr)); -} diff --git a/test/conformance/sampler/sampler_adapter_opencl.match b/test/conformance/sampler/sampler_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index 875ceb63ef..6c917914ed 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -3,6 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include @@ -12,6 +13,7 @@ #include "kernel_entry_points.h" #endif +#include #include #include @@ -40,6 +42,23 @@ std::ostream &operator<<(std::ostream &out, return out; } +std::ostream &operator<<(std::ostream &out, const ur_device_handle_t &device) { + size_t size; + urDeviceGetInfo(device, UR_DEVICE_INFO_NAME, 0, nullptr, &size); + std::vector name(size); + urDeviceGetInfo(device, UR_DEVICE_INFO_NAME, size, name.data(), nullptr); + out << name.data(); + return out; +} + +std::ostream &operator<<(std::ostream &out, + const std::vector &devices) { + for (auto device : devices) { + out << "\n * \"" << device << "\""; + } + return out; +} + uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) : platform_options{parsePlatformOptions(argc, argv)} { instance = this; @@ -57,7 +76,7 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) } ur_device_init_flags_t device_flags = 0; - auto initResult = urInit(device_flags, config); + auto initResult = urLoaderInit(device_flags, config); auto configReleaseResult = urLoaderConfigRelease(config); switch (initResult) { case UR_RESULT_SUCCESS: @@ -66,7 +85,7 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) error = ERROR_NO_ADAPTER; return; default: - error = "urInit() failed"; + error = "urLoaderInit() failed"; return; } @@ -99,14 +118,16 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) } if (platform_options.platform_name.empty()) { - if (platforms.size() == 1) { + + if (platforms.size() == 1 || platform_options.platforms_count == 1) { platform = platforms[0]; } else { std::stringstream ss_error; ss_error << "Select a single platform from below using the " "--platform=NAME " "command-line option:" - << platforms; + << platforms << std::endl + << "or set --platforms_count=1."; error = ss_error.str(); return; } @@ -135,7 +156,8 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) << "\" not found. Select a single platform from below " "using the " "--platform=NAME command-line options:" - << platforms; + << platforms << std::endl + << "or set --platforms_count=1."; error = ss_error.str(); return; } @@ -159,9 +181,8 @@ void uur::PlatformEnvironment::TearDown() { for (auto adapter : adapters) { urAdapterRelease(adapter); } - ur_tear_down_params_t tear_down_params{}; - if (urTearDown(&tear_down_params)) { - FAIL() << "urTearDown() failed"; + if (urLoaderTearDown()) { + FAIL() << "urLoaderTearDown() failed"; } } @@ -177,6 +198,40 @@ PlatformEnvironment::parsePlatformOptions(int argc, char **argv) { arg, "--platform=", sizeof("--platform=") - 1) == 0) { options.platform_name = std::string(&arg[std::strlen("--platform=")]); + } else if (std::strncmp(arg, "--platforms_count=", + sizeof("--platforms_count=") - 1) == 0) { + options.platforms_count = std::strtoul( + &arg[std::strlen("--platforms_count=")], nullptr, 10); + } + } + + /* If a platform was not provided using the --platform command line option, + * check if environment variable is set to use as a fallback. */ + if (options.platform_name.empty()) { + auto env_platform = ur_getenv("UR_CTS_ADAPTER_PLATFORM"); + if (env_platform.has_value()) { + options.platform_name = env_platform.value(); + } + } + + return options; +} + +DevicesEnvironment::DeviceOptions +DevicesEnvironment::parseDeviceOptions(int argc, char **argv) { + DeviceOptions options; + for (int argi = 1; argi < argc; ++argi) { + const char *arg = argv[argi]; + if (!(std::strcmp(arg, "-h") && std::strcmp(arg, "--help"))) { + // TODO - print help + break; + } else if (std::strncmp(arg, "--device=", sizeof("--device=") - 1) == + 0) { + options.device_name = std::string(&arg[std::strlen("--device=")]); + } else if (std::strncmp(arg, "--devices_count=", + sizeof("--devices_count=") - 1) == 0) { + options.devices_count = std::strtoul( + &arg[std::strlen("--devices_count=")], nullptr, 10); } } return options; @@ -185,7 +240,8 @@ PlatformEnvironment::parsePlatformOptions(int argc, char **argv) { DevicesEnvironment *DevicesEnvironment::instance = nullptr; DevicesEnvironment::DevicesEnvironment(int argc, char **argv) - : PlatformEnvironment(argc, argv) { + : PlatformEnvironment(argc, argv), + device_options(parseDeviceOptions(argc, argv)) { instance = this; if (!error.empty()) { return; @@ -199,11 +255,64 @@ DevicesEnvironment::DevicesEnvironment(int argc, char **argv) error = "Could not find any devices associated with the platform"; return; } - devices.resize(count); - if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), - nullptr)) { - error = "urDeviceGet() failed to get devices."; - return; + + // Get the argument (devices_count) to limit test devices count. + // In case, the devices_count is "0", the variable count will not be changed. + // The CTS will run on all devices. + if (device_options.device_name.empty()) { + if (device_options.devices_count > + (std::numeric_limits::max)()) { + error = "Invalid devices_count argument"; + return; + } else if (device_options.devices_count > 0) { + count = (std::min)( + count, static_cast(device_options.devices_count)); + } + devices.resize(count); + if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), + nullptr)) { + error = "urDeviceGet() failed to get devices."; + return; + } + } else { + devices.resize(count); + if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), + nullptr)) { + error = "urDeviceGet() failed to get devices."; + return; + } + for (u_long i = 0; i < count; i++) { + size_t size; + if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, 0, nullptr, + &size)) { + error = "urDeviceGetInfo() failed"; + return; + } + std::vector device_name(size); + if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, size, + device_name.data(), nullptr)) { + error = "urDeviceGetInfo() failed"; + return; + } + if (device_options.device_name == device_name.data()) { + device = devices[i]; + devices.clear(); + devices.resize(1); + devices[0] = device; + break; + } + } + if (!device) { + std::stringstream ss_error; + ss_error << "Device \"" << device_options.device_name + << "\" not found. Select a single device from below " + "using the " + "--device=NAME command-line options:" + << devices << std::endl + << "or set --devices_count=COUNT."; + error = ss_error.str(); + return; + } } } @@ -368,8 +477,8 @@ void KernelsEnvironment::LoadSource( binary_out = binary_ptr; } -std::vector -KernelsEnvironment::GetEntryPointNames(std::string program_name) { +std::vector KernelsEnvironment::GetEntryPointNames( + [[maybe_unused]] std::string program_name) { std::vector entry_points; #ifdef KERNELS_ENVIRONMENT entry_points = uur::device_binaries::program_kernel_map[program_name]; diff --git a/test/conformance/testing/include/uur/checks.h b/test/conformance/testing/include/uur/checks.h index 649edfefe0..2ad3925842 100644 --- a/test/conformance/testing/include/uur/checks.h +++ b/test/conformance/testing/include/uur/checks.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include namespace uur { diff --git a/test/conformance/testing/include/uur/environment.h b/test/conformance/testing/include/uur/environment.h index 5cc6756364..551be76e17 100644 --- a/test/conformance/testing/include/uur/environment.h +++ b/test/conformance/testing/include/uur/environment.h @@ -17,6 +17,7 @@ struct PlatformEnvironment : ::testing::Environment { struct PlatformOptions { std::string platform_name; + unsigned long platforms_count; }; PlatformEnvironment(int argc, char **argv); @@ -36,17 +37,26 @@ struct PlatformEnvironment : ::testing::Environment { struct DevicesEnvironment : PlatformEnvironment { + struct DeviceOptions { + std::string device_name; + unsigned long devices_count; + }; + DevicesEnvironment(int argc, char **argv); virtual ~DevicesEnvironment() override = default; virtual void SetUp() override; virtual void TearDown() override; + DeviceOptions parseDeviceOptions(int argc, char **argv); + inline const std::vector &GetDevices() const { return devices; } + DeviceOptions device_options; std::vector devices; + ur_device_handle_t device = nullptr; static DevicesEnvironment *instance; }; diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 2c6cc1dde9..2ede84d135 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -601,6 +601,128 @@ struct urMemImageQueueTest : urQueueTest { 0}; // num samples }; +struct urMultiDeviceMemImageTest : urMultiDeviceContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::SetUp()); + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc1D, nullptr, &image1D)); + + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc2D, nullptr, &image2D)); + + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc3D, nullptr, &image3D)); + } + + void TearDown() override { + if (image1D) { + EXPECT_SUCCESS(urMemRelease(image1D)); + } + if (image2D) { + EXPECT_SUCCESS(urMemRelease(image2D)); + } + if (image3D) { + EXPECT_SUCCESS(urMemRelease(image3D)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::TearDown()); + } + + const size_t width = 1024; + const size_t height = 8; + const size_t depth = 2; + ur_mem_handle_t image1D = nullptr; + ur_mem_handle_t image2D = nullptr; + ur_mem_handle_t image3D = nullptr; + ur_rect_region_t region1D{width, 1, 1}; + ur_rect_region_t region2D{width, height, 1}; + ur_rect_region_t region3D{width, height, depth}; + ur_rect_offset_t origin{0, 0, 0}; + ur_image_format_t format = {UR_IMAGE_CHANNEL_ORDER_RGBA, + UR_IMAGE_CHANNEL_TYPE_FLOAT}; + ur_image_desc_t desc1D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE1D, // mem object type + width, // image width + 1, // image height + 1, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples + + ur_image_desc_t desc2D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE2D, // mem object type + width, // image width + height, // image height + 1, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples + + ur_image_desc_t desc3D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE3D, // mem object type + width, // image width + height, // image height + depth, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples +}; + +struct urMultiDeviceMemImageQueueTest : urMultiDeviceMemImageTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageTest::SetUp()); + queues.reserve(DevicesEnvironment::instance->devices.size()); + for (const auto &device : DevicesEnvironment::instance->devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); + queues.push_back(queue); + } + } + + void TearDown() override { + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageTest::TearDown()); + } + + std::vector queues; +}; + +struct urMultiDeviceMemImageWriteTest : urMultiDeviceMemImageQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageQueueTest::SetUp()); + + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image1D, true, origin, + region1D, 0, 0, input1D.data(), 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image2D, true, origin, + region2D, 0, 0, input2D.data(), 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image3D, true, origin, + region3D, 0, 0, input3D.data(), 0, + nullptr, nullptr)); + } + + void TearDown() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageQueueTest::TearDown()); + } + + std::vector input1D = std::vector(width * 4, 42); + std::vector input2D = + std::vector(width * height * 4, 42); + std::vector input3D = + std::vector(width * height * depth * 4, 42); +}; + struct urUSMDeviceAllocTest : urQueueTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); @@ -810,7 +932,9 @@ struct urVirtualMemMappedTest : urVirtualMemTest { } void TearDown() override { - EXPECT_SUCCESS(urVirtualMemUnmap(context, virtual_ptr, size)); + if (virtual_ptr) { + EXPECT_SUCCESS(urVirtualMemUnmap(context, virtual_ptr, size)); + } UUR_RETURN_ON_FATAL_FAILURE(urVirtualMemTest::TearDown()); } }; @@ -826,8 +950,10 @@ struct urVirtualMemMappedTestWithParam : urVirtualMemTestWithParam { } void TearDown() override { - EXPECT_SUCCESS( - urVirtualMemUnmap(this->context, this->virtual_ptr, this->size)); + if (this->virtual_ptr) { + EXPECT_SUCCESS(urVirtualMemUnmap(this->context, this->virtual_ptr, + this->size)); + } UUR_RETURN_ON_FATAL_FAILURE(urVirtualMemTestWithParam::TearDown()); } }; @@ -861,7 +987,9 @@ struct urUSMDeviceAllocTestWithParam : urQueueTestWithParam { } void TearDown() override { - ASSERT_SUCCESS(urUSMFree(this->context, ptr)); + if (ptr) { + ASSERT_SUCCESS(urUSMFree(this->context, ptr)); + } if (pool) { ASSERT_TRUE(use_pool); ASSERT_SUCCESS(urUSMPoolRelease(pool)); @@ -961,14 +1089,17 @@ template struct urProgramTestWithParam : urContextTestWithParam { ur_program_handle_t program = nullptr; }; -struct urKernelTest : urProgramTest { +struct urBaseKernelTest : urProgramTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); - ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); auto kernel_names = uur::KernelsEnvironment::instance->GetEntryPointNames(program_name); kernel_name = kernel_names[0]; ASSERT_FALSE(kernel_name.empty()); + } + + void Build() { + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); ASSERT_SUCCESS(urKernelCreate(program, kernel_name.data(), &kernel)); } @@ -983,15 +1114,26 @@ struct urKernelTest : urProgramTest { ur_kernel_handle_t kernel = nullptr; }; -template struct urKernelTestWithParam : urProgramTestWithParam { +struct urKernelTest : urBaseKernelTest { + void SetUp() override { + urBaseKernelTest::SetUp(); + Build(); + } +}; + +template +struct urBaseKernelTestWithParam : urProgramTestWithParam { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTestWithParam::SetUp()); - ASSERT_SUCCESS(urProgramBuild(this->context, this->program, nullptr)); auto kernel_names = uur::KernelsEnvironment::instance->GetEntryPointNames( this->program_name); kernel_name = kernel_names[0]; ASSERT_FALSE(kernel_name.empty()); + } + + void Build() { + ASSERT_SUCCESS(urProgramBuild(this->context, this->program, nullptr)); ASSERT_SUCCESS( urKernelCreate(this->program, kernel_name.data(), &kernel)); } @@ -1007,16 +1149,23 @@ template struct urKernelTestWithParam : urProgramTestWithParam { ur_kernel_handle_t kernel = nullptr; }; -struct urKernelExecutionTest : urKernelTest { +template struct urKernelTestWithParam : urBaseKernelTestWithParam { void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTestWithParam::SetUp()); + urBaseKernelTestWithParam::Build(); + } +}; + +struct urBaseKernelExecutionTest : urBaseKernelTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::SetUp()); } void TearDown() override { for (auto &buffer : buffer_args) { ASSERT_SUCCESS(urMemRelease(buffer)); } - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::TearDown()); } // Adds a kernel arg representing a sycl buffer constructed with a 1D range. @@ -1105,6 +1254,13 @@ struct urKernelExecutionTest : urKernelTest { uint32_t current_arg_index = 0; }; +struct urKernelExecutionTest : urBaseKernelExecutionTest { + void SetUp() { + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelExecutionTest::SetUp()); + Build(); + } +}; + template struct GlobalVar { std::string name; T value; diff --git a/test/conformance/usm/usm_adapter_cuda.match b/test/conformance/usm/usm_adapter_cuda.match index e2ba6b6f63..15b68f5c6c 100644 --- a/test/conformance/usm/usm_adapter_cuda.match +++ b/test/conformance/usm/usm_adapter_cuda.match @@ -1,45 +1,7 @@ -urUSMDeviceAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullPtrResult/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_SIZE -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_POOL -{{OPT}}urUSMHostAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMPoolCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urUSMPoolCreateTest.SuccessWithFlag/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPoolDesc/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidEnumerationFlags/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_CONTEXT -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -{{OPT}}urUSMPoolGetInfoTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidEnumerationProperty/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeZero/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeTooSmall/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropValue/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMSharedAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithMultipleAdvices/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index 9e275d805e..bf45b83ec2 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,36 +1,11 @@ -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMFreeTest.SuccessDeviceAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessHostAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessSharedAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_TYPE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_SIZE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_DEVICE urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidValuePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_CONTEXT urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urUSMPoolRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match new file mode 100644 index 0000000000..b9aa3f3bdf --- /dev/null +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -0,0 +1,42 @@ +urUSMDeviceAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolDisabled +urUSMDeviceAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullPtrResult/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMAllocInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_USM_ALLOC_INFO_POOL +urUSMHostAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolDisabled +urUSMHostAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMPoolCreateTest.Success/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.SuccessWithFlag/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidNullPointerPoolDesc/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidNullPointerPool/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.InvalidEnumerationFlags/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_CONTEXT +urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT +urUSMPoolGetInfoTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidEnumerationProperty/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidSizeZero/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}}_ +urUSMPoolDestroyTest.Success/Intel_R__OpenCL___{{.*}}_ +urUSMPoolDestroyTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ +urUSMPoolRetainTest.Success/Intel_R__OpenCL___{{.*}}_ +urUSMPoolRetainTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ +urUSMSharedAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled +urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled diff --git a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp index d4feccd6dc..c3331f1b5b 100644 --- a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp +++ b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp @@ -4,8 +4,20 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -using urVirtualMemGranularityGetInfoTest = - uur::urContextTestWithParam; +struct urVirtualMemGranularityGetInfoTest + : uur::urContextTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urContextTestWithParam::SetUp()); + ur_bool_t virtual_memory_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + this->device, UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, + sizeof(ur_bool_t), &virtual_memory_support, nullptr)); + if (!virtual_memory_support) { + GTEST_SKIP() << "Virtual memory is not supported."; + } + } +}; UUR_TEST_SUITE_P( urVirtualMemGranularityGetInfoTest, @@ -42,7 +54,19 @@ TEST_P(urVirtualMemGranularityGetInfoTest, Success) { } } -using urVirtualMemGranularityGetInfoNegativeTest = uur::urContextTest; +struct urVirtualMemGranularityGetInfoNegativeTest : uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urContextTest::SetUp()); + + ur_bool_t virtual_memory_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, sizeof(ur_bool_t), + &virtual_memory_support, nullptr)); + if (!virtual_memory_support) { + GTEST_SKIP() << "Virtual memory is not supported."; + } + } +}; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urVirtualMemGranularityGetInfoNegativeTest); TEST_P(urVirtualMemGranularityGetInfoNegativeTest, InvalidNullHandleContext) { diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_opencl.match b/test/conformance/virtual_memory/virtual_memory_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt index 97ad1c91c9..897075c53d 100644 --- a/test/fuzz/CMakeLists.txt +++ b/test/fuzz/CMakeLists.txt @@ -18,7 +18,7 @@ function(add_fuzz_test name label) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set(ENV_VARS UR_ENABLE_LAYERS=UR_LAYER_FULL_VALIDATION) - if(UR_BUILD_ADAPTER_L0) + if(UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_ALL) list(APPEND ENV_VARS UR_ADAPTERS_FORCE_LOAD=\"$\" NEOReadDebugKeys=1 diff --git a/test/fuzz/README.md b/test/fuzz/README.md index 22213b4dbf..9acc3f57ac 100644 --- a/test/fuzz/README.md +++ b/test/fuzz/README.md @@ -1,6 +1,6 @@ # Corpora for fuzz tests with fixed API calls scenarios Corpora in 'corpus' directory contain UR API calls in a predefined order described below. -All such scenarios begin with single calls to urInit() and urAdapterGet(). +All such scenarios begin with single calls to urLoaderInit() and urAdapterGet(). Corpus files are binary files containing ASCII characters which are interpreted by the test backwards, meaning that bytes are read from the end of the file to the beginning of the file. diff --git a/test/fuzz/urFuzz.cpp b/test/fuzz/urFuzz.cpp index cab9a44dd2..0d24f47516 100644 --- a/test/fuzz/urFuzz.cpp +++ b/test/fuzz/urFuzz.cpp @@ -419,7 +419,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { } LoaderConfig config; - ur_result_t res = urInit(0, config.handle); + ur_result_t res = urLoaderInit(0, config.handle); if (res != UR_RESULT_SUCCESS) { return -1; } @@ -438,6 +438,11 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { } } + res = urLoaderTearDown(); + if (res != UR_RESULT_SUCCESS) { + return -1; + } + return 0; } } // namespace fuzz diff --git a/test/layers/tracing/CMakeLists.txt b/test/layers/tracing/CMakeLists.txt index db4b9da590..2ccb4f69b0 100644 --- a/test/layers/tracing/CMakeLists.txt +++ b/test/layers/tracing/CMakeLists.txt @@ -3,9 +3,36 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -set(TEST_NAME example-collected-hello-world) +add_ur_library(test_collector SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/test_collector.cpp +) + +target_include_directories(test_collector PRIVATE + ${CMAKE_SOURCE_DIR}/include +) + +target_link_libraries(test_collector PRIVATE ${TARGET_XPTI}) +target_include_directories(test_collector PRIVATE ${xpti_SOURCE_DIR}/include) + +if(MSVC) + target_compile_definitions(test_collector PRIVATE + XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) +endif() -add_test(NAME ${TEST_NAME} +function(set_tracing_test_props target_name collector_name) + set_tests_properties(${target_name} PROPERTIES + LABELS "tracing" + ) + + set_property(TEST ${target_name} PROPERTY ENVIRONMENT + "XPTI_TRACE_ENABLE=1" + "XPTI_FRAMEWORK_DISPATCHER=$" + "XPTI_SUBSCRIBERS=$" + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + "UR_ENABLE_LAYERS=UR_LAYER_TRACING") +endfunction() + +add_test(NAME example-collected-hello-world COMMAND ${CMAKE_COMMAND} -D MODE=stdout -D TEST_FILE=$ @@ -14,13 +41,28 @@ add_test(NAME ${TEST_NAME} DEPENDS collector hello_world ) -set_tests_properties(${TEST_NAME} PROPERTIES - LABELS "tracing" -) +set_tracing_test_props(example-collected-hello-world collector) + +function(add_tracing_test name) + set(TEST_TARGET_NAME tracing-test-${name}) + add_ur_executable(${TEST_TARGET_NAME} + ${ARGN}) + target_link_libraries(${TEST_TARGET_NAME} + PRIVATE + ${PROJECT_NAME}::loader + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::testing + GTest::gtest_main) + add_test(NAME ${name} + COMMAND ${CMAKE_COMMAND} + -D MODE=stderr + -D TEST_FILE=$ + -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}.out.match + -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake + DEPENDS test_collector + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + set_tracing_test_props(${name} test_collector) +endfunction() -set_property(TEST ${TEST_NAME} PROPERTY ENVIRONMENT - "XPTI_TRACE_ENABLE=1" - "XPTI_FRAMEWORK_DISPATCHER=$" - "XPTI_SUBSCRIBERS=$" - "UR_ADAPTERS_FORCE_LOAD=\"$\"" - "UR_ENABLE_LAYERS=UR_LAYER_TRACING") +add_tracing_test(codeloc codeloc.cpp) diff --git a/test/layers/tracing/codeloc.cpp b/test/layers/tracing/codeloc.cpp new file mode 100644 index 0000000000..e0f1f91df1 --- /dev/null +++ b/test/layers/tracing/codeloc.cpp @@ -0,0 +1,53 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file codeloc.cpp + * + */ + +#include +#include + +struct ur_code_location_t test_callback(void *userdata) { + (void)userdata; + + ur_code_location_t codeloc; + codeloc.columnNumber = 1; + codeloc.lineNumber = 2; + codeloc.functionName = "fname"; + codeloc.sourceFile = "sfile"; + + return codeloc; +} + +TEST(LoaderCodeloc, NullCallback) { + ur_loader_config_handle_t loader_config; + ASSERT_EQ(urLoaderConfigCreate(&loader_config), UR_RESULT_SUCCESS); + ASSERT_EQ( + urLoaderConfigSetCodeLocationCallback(loader_config, nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); + urLoaderConfigRelease(loader_config); +} + +TEST(LoaderCodeloc, NullHandle) { + ASSERT_EQ( + urLoaderConfigSetCodeLocationCallback(nullptr, test_callback, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +TEST(LoaderCodeloc, Success) { + ur_loader_config_handle_t loader_config; + ASSERT_EQ(urLoaderConfigCreate(&loader_config), UR_RESULT_SUCCESS); + ASSERT_EQ(urLoaderConfigSetCodeLocationCallback(loader_config, + test_callback, nullptr), + UR_RESULT_SUCCESS); + urLoaderInit(0, loader_config); + uint32_t nadapters; + urAdapterGet(0, nullptr, &nadapters); + urLoaderConfigRelease(loader_config); +} diff --git a/test/layers/tracing/codeloc.out.match b/test/layers/tracing/codeloc.out.match new file mode 100644 index 0000000000..dc0c2e1335 --- /dev/null +++ b/test/layers/tracing/codeloc.out.match @@ -0,0 +1,2 @@ +begin urAdapterGet 178 fname sfile 2 1 +end urAdapterGet 178 fname sfile 2 1 diff --git a/test/layers/tracing/hello_world.out.match b/test/layers/tracing/hello_world.out.match index 7658650d04..cef17b8fdf 100644 --- a/test/layers/tracing/hello_world.out.match +++ b/test/layers/tracing/hello_world.out.match @@ -1,27 +1,23 @@ -function_with_args_begin(1) - urInit(.device_flags = 0); -function_with_args_end(1) - urInit(...) -> ur_result_t(0); Platform initialized. -function_with_args_begin(2) - urAdapterGet(unimplemented); +function_with_args_begin(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {{.*}}, .pNumAdapters = {{.*}}); +function_with_args_end(1) - urAdapterGet(...) -> ur_result_t(0); +function_with_args_begin(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{.*}}, .pNumAdapters = {{.*}}); function_with_args_end(2) - urAdapterGet(...) -> ur_result_t(0); -function_with_args_begin(3) - urAdapterGet(unimplemented); -function_with_args_end(3) - urAdapterGet(...) -> ur_result_t(0); +function_with_args_begin(3) - urPlatformGet(unimplemented); +function_with_args_end(3) - urPlatformGet(...) -> ur_result_t(0); function_with_args_begin(4) - urPlatformGet(unimplemented); function_with_args_end(4) - urPlatformGet(...) -> ur_result_t(0); -function_with_args_begin(5) - urPlatformGet(unimplemented); -function_with_args_end(5) - urPlatformGet(...) -> ur_result_t(0); -function_with_args_begin(6) - urPlatformGetApiVersion(unimplemented); -function_with_args_end(6) - urPlatformGetApiVersion(...) -> ur_result_t(0); +function_with_args_begin(5) - urPlatformGetApiVersion(unimplemented); +function_with_args_end(5) - urPlatformGetApiVersion(...) -> ur_result_t(0); API version: {{0\.[0-9]+}} +function_with_args_begin(6) - urDeviceGet(unimplemented); +function_with_args_end(6) - urDeviceGet(...) -> ur_result_t(0); function_with_args_begin(7) - urDeviceGet(unimplemented); function_with_args_end(7) - urDeviceGet(...) -> ur_result_t(0); -function_with_args_begin(8) - urDeviceGet(unimplemented); -function_with_args_end(8) - urDeviceGet(...) -> ur_result_t(0); +function_with_args_begin(8) - urDeviceGetInfo(unimplemented); +function_with_args_end(8) - urDeviceGetInfo(...) -> ur_result_t(0); function_with_args_begin(9) - urDeviceGetInfo(unimplemented); function_with_args_end(9) - urDeviceGetInfo(...) -> ur_result_t(0); -function_with_args_begin(10) - urDeviceGetInfo(unimplemented); -function_with_args_end(10) - urDeviceGetInfo(...) -> ur_result_t(0); Found a Null Device gpu. -function_with_args_begin(11) - urAdapterRelease(unimplemented); -function_with_args_end(11) - urAdapterRelease(...) -> ur_result_t(0); -function_with_args_begin(12) - urTearDown(unimplemented); -function_with_args_end(12) - urTearDown(...) -> ur_result_t(0); +function_with_args_begin(10) - urAdapterRelease(unimplemented); +function_with_args_end(10) - urAdapterRelease(...) -> ur_result_t(0); diff --git a/test/layers/tracing/test_collector.cpp b/test/layers/tracing/test_collector.cpp new file mode 100644 index 0000000000..6c942c63ec --- /dev/null +++ b/test/layers/tracing/test_collector.cpp @@ -0,0 +1,74 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file test_collector.cpp + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ur_api.h" +#include "xpti/xpti_trace_framework.h" + +constexpr uint16_t TRACE_FN_BEGIN = + static_cast(xpti::trace_point_type_t::function_with_args_begin); +constexpr uint16_t TRACE_FN_END = + static_cast(xpti::trace_point_type_t::function_with_args_end); +constexpr std::string_view UR_STREAM_NAME = "ur"; + +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, xpti::trace_event_data_t *, + xpti::trace_event_data_t *child, uint64_t, + const void *user_data) { + auto *args = static_cast(user_data); + auto *payload = xptiQueryPayload(child); + std::cerr << (trace_type == TRACE_FN_BEGIN ? "begin" : "end"); + std::cerr << " " << args->function_name << " " << args->function_id; + if (payload) { + std::cerr << " " << payload->name << " " << payload->source_file << " " + << payload->line_no << " " << payload->column_no; + } + std::cerr << std::endl; +} + +XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, + unsigned int minor_version, const char *, + const char *stream_name) { + if (stream_name == nullptr) { + std::cout << "Stream name not provided. Aborting." << std::endl; + return; + } + if (std::string_view(stream_name) != UR_STREAM_NAME) { + std::cout << "Invalid stream name: " << stream_name << ". Expected " + << UR_STREAM_NAME << ". Aborting." << std::endl; + return; + } + + if (UR_MAKE_VERSION(major_version, minor_version) != + UR_API_VERSION_CURRENT) { + std::cout << "Invalid stream version: " << major_version << "." + << minor_version << ". Expected " + << UR_MAJOR_VERSION(UR_API_VERSION_CURRENT) << "." + << UR_MINOR_VERSION(UR_API_VERSION_CURRENT) << ". Aborting." + << std::endl; + return; + } + + uint8_t stream_id = xptiRegisterStream(stream_name); + + xptiRegisterCallback(stream_id, TRACE_FN_BEGIN, trace_cb); + xptiRegisterCallback(stream_id, TRACE_FN_END, trace_cb); +} + +XPTI_CALLBACK_API void xptiTraceFinish(const char *) { /* noop */ +} diff --git a/test/layers/validation/fixtures.hpp b/test/layers/validation/fixtures.hpp index a41e48b3a4..ab92ba1e01 100644 --- a/test/layers/validation/fixtures.hpp +++ b/test/layers/validation/fixtures.hpp @@ -17,15 +17,14 @@ struct urTest : ::testing::Test { "UR_LAYER_FULL_VALIDATION"), UR_RESULT_SUCCESS); ur_device_init_flags_t device_flags = 0; - ASSERT_EQ(urInit(device_flags, loader_config), UR_RESULT_SUCCESS); + ASSERT_EQ(urLoaderInit(device_flags, loader_config), UR_RESULT_SUCCESS); } void TearDown() override { if (loader_config) { ASSERT_EQ(urLoaderConfigRelease(loader_config), UR_RESULT_SUCCESS); } - ur_tear_down_params_t tear_down_params{}; - ASSERT_EQ(urTearDown(&tear_down_params), UR_RESULT_SUCCESS); + ASSERT_EQ(urLoaderTearDown(), UR_RESULT_SUCCESS); } ur_loader_config_handle_t loader_config = nullptr; @@ -53,7 +52,12 @@ struct valPlatformsTest : urTest { UR_RESULT_SUCCESS); } - void TearDown() override { urTest::TearDown(); } + void TearDown() override { + for (auto &adapter : adapters) { + ASSERT_EQ(urAdapterRelease(adapter), UR_RESULT_SUCCESS); + } + urTest::TearDown(); + } std::vector adapters; std::vector platforms; diff --git a/test/layers/validation/leaks.cpp b/test/layers/validation/leaks.cpp index b0df81207e..e32aeafc89 100644 --- a/test/layers/validation/leaks.cpp +++ b/test/layers/validation/leaks.cpp @@ -5,6 +5,12 @@ #include "fixtures.hpp" +TEST_F(urTest, testUrAdapterGetLeak) { + ur_adapter_handle_t adapter = nullptr; + ASSERT_EQ(urAdapterGet(1, &adapter, nullptr), UR_RESULT_SUCCESS); + ASSERT_NE(nullptr, adapter); +} + TEST_F(valDeviceTest, testUrContextCreateLeak) { ur_context_handle_t context = nullptr; ASSERT_EQ(urContextCreate(1, &device, nullptr, &context), diff --git a/test/layers/validation/leaks.out.match b/test/layers/validation/leaks.out.match index aadba2252c..9fac722527 100644 --- a/test/layers/validation/leaks.out.match +++ b/test/layers/validation/leaks.out.match @@ -3,28 +3,45 @@ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 +\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ +\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: +(.*) +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) diff --git a/test/layers/validation/leaks_mt.out.match b/test/layers/validation/leaks_mt.out.match index 7d5a0bedd8..86de1e1d76 100644 --- a/test/layers/validation/leaks_mt.out.match +++ b/test/layers/validation/leaks_mt.out.match @@ -1,10 +1,13 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 3 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 4 @@ -13,17 +16,21 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 7 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 8 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 9 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 9 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 @@ -39,18 +46,22 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -6 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -7 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -7 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ @@ -67,6 +78,7 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) diff --git a/test/layers/validation/parameters.cpp b/test/layers/validation/parameters.cpp index ee679363dc..c02afd63d6 100644 --- a/test/layers/validation/parameters.cpp +++ b/test/layers/validation/parameters.cpp @@ -5,18 +5,6 @@ #include "fixtures.hpp" -TEST(valTest, urInit) { - ur_loader_config_handle_t config; - urLoaderConfigCreate(&config); - urLoaderConfigEnableLayer(config, "UR_PARAMETER_VALIDATION_LAYER"); - - const ur_device_init_flags_t device_flags = - UR_DEVICE_INIT_FLAG_FORCE_UINT32; - ASSERT_EQ(urInit(device_flags, config), - UR_RESULT_ERROR_INVALID_ENUMERATION); - ASSERT_EQ(urLoaderConfigRelease(config), UR_RESULT_SUCCESS); -} - TEST_F(valPlatformsTest, testUrPlatformGetApiVersion) { ur_api_version_t api_version = {}; diff --git a/test/loader/CMakeLists.txt b/test/loader/CMakeLists.txt index 0dbf999c45..d36f922098 100644 --- a/test/loader/CMakeLists.txt +++ b/test/loader/CMakeLists.txt @@ -10,4 +10,5 @@ set_tests_properties(example-hello-world PROPERTIES LABELS "loader" add_subdirectory(adapter_registry) add_subdirectory(loader_config) +add_subdirectory(loader_lifetime) add_subdirectory(platforms) diff --git a/test/loader/adapter_registry/search_order.cpp b/test/loader/adapter_registry/search_order.cpp index 9264bafe44..dbb6115467 100644 --- a/test/loader/adapter_registry/search_order.cpp +++ b/test/loader/adapter_registry/search_order.cpp @@ -6,7 +6,7 @@ #include "fixtures.hpp" template -void assertRegistryPathSequence(std::vector testAdapterPaths, +void assertRegistryPathSequence(const std::vector &testAdapterPaths, P predicate) { static size_t assertIndex = 0; @@ -24,7 +24,7 @@ TEST_F(adapterRegSearchTest, testSearchOrder) { auto it = std::find_if(registry.cbegin(), registry.cend(), hasTestLibName); ASSERT_NE(it, registry.end()); - auto testAdapterPaths = *it; + const auto &testAdapterPaths = *it; assertRegistryPathSequence(testAdapterPaths, isTestEnvPath); #ifndef _WIN32 assertRegistryPathSequence(testAdapterPaths, isTestLibName); diff --git a/test/loader/loader_config/CMakeLists.txt b/test/loader/loader_config/CMakeLists.txt index b2c2ffc4ec..db07bec990 100644 --- a/test/loader/loader_config/CMakeLists.txt +++ b/test/loader/loader_config/CMakeLists.txt @@ -3,7 +3,7 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -add_executable(test-loader-config +add_ur_executable(test-loader-config urLoaderConfigCreate.cpp urLoaderConfigGetInfo.cpp urLoaderConfigEnableLayer.cpp diff --git a/test/loader/loader_lifetime/CMakeLists.txt b/test/loader/loader_lifetime/CMakeLists.txt new file mode 100644 index 0000000000..c76ff87d0b --- /dev/null +++ b/test/loader/loader_lifetime/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (C) 2023 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_executable(test-loader-lifetime + urLoaderInit.cpp + urLoaderTearDown.cpp +) + +target_link_libraries(test-loader-lifetime + PRIVATE + ${PROJECT_NAME}::common + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::loader + gmock + GTest::gtest_main +) + +add_test(NAME loader-lifetime + COMMAND test-loader-lifetime + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) diff --git a/test/loader/loader_lifetime/fixtures.hpp b/test/loader/loader_lifetime/fixtures.hpp new file mode 100644 index 0000000000..b1eb3766c5 --- /dev/null +++ b/test/loader/loader_lifetime/fixtures.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UR_LOADER_CONFIG_TEST_FIXTURES_H +#define UR_LOADER_CONFIG_TEST_FIXTURES_H + +#include "ur_api.h" +#include +#include + +#ifndef ASSERT_SUCCESS +#define ASSERT_SUCCESS(ACTUAL) ASSERT_EQ(UR_RESULT_SUCCESS, ACTUAL) +#endif + +/// @brief Make a string a valid identifier for gtest. +/// @param str The string to sanitize. +inline std::string GTestSanitizeString(const std::string &str) { + auto str_cpy = str; + std::replace_if( + str_cpy.begin(), str_cpy.end(), [](char c) { return !std::isalnum(c); }, + '_'); + return str_cpy; +} + +#endif diff --git a/test/conformance/runtime/urInit.cpp b/test/loader/loader_lifetime/urLoaderInit.cpp similarity index 62% rename from test/conformance/runtime/urInit.cpp rename to test/loader/loader_lifetime/urLoaderInit.cpp index 1de30ff471..dc1fdfa8ce 100644 --- a/test/conformance/runtime/urInit.cpp +++ b/test/loader/loader_lifetime/urLoaderInit.cpp @@ -2,11 +2,13 @@ // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include +#include "fixtures.hpp" +#include -using urInitTestWithParam = ::testing::TestWithParam; +using urLoaderInitTestWithParam = + ::testing::TestWithParam; INSTANTIATE_TEST_SUITE_P( - , urInitTestWithParam, + , urLoaderInitTestWithParam, ::testing::Values(UR_DEVICE_INIT_FLAG_GPU, UR_DEVICE_INIT_FLAG_CPU, UR_DEVICE_INIT_FLAG_FPGA, UR_DEVICE_INIT_FLAG_MCA, UR_DEVICE_INIT_FLAG_VPU, @@ -15,25 +17,24 @@ INSTANTIATE_TEST_SUITE_P( UR_DEVICE_INIT_FLAG_FPGA | UR_DEVICE_INIT_FLAG_VPU), [](const ::testing::TestParamInfo &info) { std::stringstream ss; - ur_params::serializeFlag(ss, info.param); - return uur::GTestSanitizeString(ss.str()); + ur::details::printFlag(ss, info.param); + return GTestSanitizeString(ss.str()); }); -TEST_P(urInitTestWithParam, Success) { +TEST_P(urLoaderInitTestWithParam, Success) { ur_loader_config_handle_t config = nullptr; urLoaderConfigCreate(&config); urLoaderConfigEnableLayer(config, "UR_LAYER_FULL_VALIDATION"); ur_device_init_flags_t device_flags = GetParam(); - ASSERT_SUCCESS(urInit(device_flags, config)); + ASSERT_SUCCESS(urLoaderInit(device_flags, config)); - ur_tear_down_params_t tear_down_params{nullptr}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); + ASSERT_SUCCESS(urLoaderTearDown()); } -TEST(urInitTest, ErrorInvalidEnumerationDeviceFlags) { +TEST(urLoaderInitTest, ErrorInvalidEnumerationDeviceFlags) { const ur_device_init_flags_t device_flags = UR_DEVICE_INIT_FLAG_FORCE_UINT32; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, - urInit(device_flags, nullptr)); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_ENUMERATION, + urLoaderInit(device_flags, nullptr)); } diff --git a/test/loader/loader_lifetime/urLoaderTearDown.cpp b/test/loader/loader_lifetime/urLoaderTearDown.cpp new file mode 100644 index 0000000000..a4c3dc83fb --- /dev/null +++ b/test/loader/loader_lifetime/urLoaderTearDown.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2022-2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "fixtures.hpp" + +struct urLoaderTearDownTest : testing::Test { + void SetUp() override { + ur_device_init_flags_t device_flags = 0; + ASSERT_SUCCESS(urLoaderInit(device_flags, nullptr)); + } +}; + +TEST_F(urLoaderTearDownTest, Success) { ASSERT_SUCCESS(urLoaderTearDown()); } diff --git a/test/loader/platforms/no_platforms.match b/test/loader/platforms/no_platforms.match index da17800c0c..b695672e4d 100644 --- a/test/loader/platforms/no_platforms.match +++ b/test/loader/platforms/no_platforms.match @@ -1,2 +1,2 @@ -[INFO]: urInit succeeded. +[INFO]: urLoaderInit succeeded. [INFO]: urPlatformGet found 0 platforms diff --git a/test/loader/platforms/null_platform.match b/test/loader/platforms/null_platform.match index 6c7d8a97f4..29cadc78b5 100644 --- a/test/loader/platforms/null_platform.match +++ b/test/loader/platforms/null_platform.match @@ -1,3 +1,3 @@ -[INFO]: urInit succeeded. +[INFO]: urLoaderInit succeeded. [INFO]: urPlatformGet found 1 platforms -[INFO]: Found UR_PLATFORM_NULL \ No newline at end of file +[INFO]: Found UR_PLATFORM_NULL diff --git a/test/loader/platforms/platforms.cpp b/test/loader/platforms/platforms.cpp index bb4f8fb79d..164441d37d 100644 --- a/test/loader/platforms/platforms.cpp +++ b/test/loader/platforms/platforms.cpp @@ -11,38 +11,38 @@ #include #include -#include +#include #include "ur_api.h" using namespace logger; ////////////////////////////////////////////////////////////////////////// -int main(int argc, char *argv[]) { +int main(int, char *[]) { auto out = create_logger("TEST"); ur_result_t status; // Initialize the platform - status = urInit(0, nullptr); + status = urLoaderInit(0, nullptr); if (status != UR_RESULT_SUCCESS) { - out.error("urInit failed with return code: {}", status); + out.error("urLoaderInit failed with return code: {}", status); return 1; } - out.info("urInit succeeded."); + out.info("urLoaderInit succeeded."); uint32_t adapterCount = 0; std::vector adapters; status = urAdapterGet(0, nullptr, &adapterCount); if (status != UR_RESULT_SUCCESS) { - error("urAdapterGet failed with return code: {}", status); + out.error("urAdapterGet failed with return code: {}", status); return 1; } adapters.resize(adapterCount); status = urAdapterGet(adapterCount, adapters.data(), nullptr); if (status != UR_RESULT_SUCCESS) { - error("urAdapterGet failed with return code: {}", status); + out.error("urAdapterGet failed with return code: {}", status); return 1; } @@ -89,6 +89,6 @@ int main(int argc, char *argv[]) { free(name); } out: - urTearDown(nullptr); + urLoaderTearDown(); return status == UR_RESULT_SUCCESS ? 0 : 1; } diff --git a/test/tools/urtrace/null_hello.match b/test/tools/urtrace/null_hello.match index b58a4d8d96..54c6efb9cb 100644 --- a/test/tools/urtrace/null_hello.match +++ b/test/tools/urtrace/null_hello.match @@ -1,4 +1,3 @@ -urInit(.device_flags = 0, .hLoaderConfig = nullptr) -> UR_RESULT_SUCCESS; Platform initialized. urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; @@ -12,4 +11,3 @@ urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; Found a Null Device gpu. urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; -urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_begin.match b/test/tools/urtrace/null_hello_begin.match index 81c15da60f..bf2d85145a 100644 --- a/test/tools/urtrace/null_hello_begin.match +++ b/test/tools/urtrace/null_hello_begin.match @@ -1,27 +1,23 @@ -begin(1) - urInit(.device_flags = 0, .hLoaderConfig = nullptr); -end(1) - urInit(.device_flags = 0, .hLoaderConfig = nullptr) -> UR_RESULT_SUCCESS; Platform initialized. -begin(2) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (0)); -end(2) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(3) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr); -end(3) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; -begin(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (0)); -end(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(5) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {nullptr}, .pNumPlatforms = nullptr); -end(5) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; -begin(6) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (0.0)); -end(6) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@)) -> UR_RESULT_SUCCESS; +begin(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (0)); +end(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr); +end(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; +begin(3) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (0)); +end(3) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {nullptr}, .pNumPlatforms = nullptr); +end(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; +begin(5) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (0.0)); +end(5) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@)) -> UR_RESULT_SUCCESS; API version: {{.*}} -begin(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (0)); -end(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(8) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {nullptr}, .pNumDevices = nullptr); -end(8) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; -begin(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); -end(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; -begin(10) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); -end(10) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +begin(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (0)); +end(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {nullptr}, .pNumDevices = nullptr); +end(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +begin(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); +end(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +begin(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); +end(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; Found a Null Device gpu. -begin(11) - urAdapterRelease(.hAdapter = {{.*}}); -end(11) - urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; -begin(12) - urTearDown(.pParams = nullptr); -end(12) - urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; +begin(10) - urAdapterRelease(.hAdapter = {{.*}}); +end(10) - urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_json.match b/test/tools/urtrace/null_hello_json.match index 18c5fbac78..5b9377e8d6 100644 --- a/test/tools/urtrace/null_hello_json.match +++ b/test/tools/urtrace/null_hello_json.match @@ -1,6 +1,5 @@ { "traceEvents": [ -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urInit", "args": "(.device_flags = 0, .hLoaderConfig = nullptr)" }, Platform initialized. { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1))" }, { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr)" }, @@ -14,7 +13,6 @@ API version: @PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@ { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGetInfo", "args": "(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}} (Null Device), .pPropSizeRet = nullptr)" }, Found a Null Device gpu. { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterRelease", "args": "(.hAdapter = {{.*}})" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urTearDown", "args": "(.pParams = nullptr)" }, {"name": "", "cat": "", "ph": "", "pid": "", "tid": "", "ts": ""} ] } diff --git a/test/tools/urtrace/null_hello_no_args.match b/test/tools/urtrace/null_hello_no_args.match index e0afcd2868..6462f41d02 100644 --- a/test/tools/urtrace/null_hello_no_args.match +++ b/test/tools/urtrace/null_hello_no_args.match @@ -1,4 +1,3 @@ -urInit(...) -> UR_RESULT_SUCCESS; Platform initialized. urAdapterGet(...) -> UR_RESULT_SUCCESS; urAdapterGet(...) -> UR_RESULT_SUCCESS; @@ -12,4 +11,3 @@ urDeviceGetInfo(...) -> UR_RESULT_SUCCESS; urDeviceGetInfo(...) -> UR_RESULT_SUCCESS; Found a Null Device gpu. urAdapterRelease(...) -> UR_RESULT_SUCCESS; -urTearDown(...) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_profiling.match b/test/tools/urtrace/null_hello_profiling.match index 635c3c8784..7bd3bd53c1 100644 --- a/test/tools/urtrace/null_hello_profiling.match +++ b/test/tools/urtrace/null_hello_profiling.match @@ -1,4 +1,3 @@ -urInit(.device_flags = 0, .hLoaderConfig = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) Platform initialized. urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) @@ -12,4 +11,3 @@ urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) Found a Null Device gpu. urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) -urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) diff --git a/test/unified_malloc_framework/common/pool.hpp b/test/unified_malloc_framework/common/pool.hpp index 7a7b650e11..f31acf8d22 100644 --- a/test/unified_malloc_framework/common/pool.hpp +++ b/test/unified_malloc_framework/common/pool.hpp @@ -23,6 +23,7 @@ #include #include "base.hpp" +#include "provider.hpp" #include "umf_helpers.hpp" namespace umf_test { @@ -31,6 +32,17 @@ auto wrapPoolUnique(umf_memory_pool_handle_t hPool) { return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); } +template +auto makePoolWithOOMProvider(int allocNum, Args &&...args) { + auto [ret, provider] = + umf::memoryProviderMakeUnique(allocNum); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto [retp, pool] = umf::poolMakeUnique( + {std::move(provider)}, std::forward(args)...); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); + return std::move(pool); +} + bool isReallocSupported(umf_memory_pool_handle_t hPool) { static constexpr size_t allocSize = 8; bool supported; @@ -76,7 +88,7 @@ struct pool_base { umf_result_t initialize(umf_memory_provider_handle_t *, size_t) noexcept { return UMF_RESULT_SUCCESS; }; - void *malloc(size_t size) noexcept { return nullptr; } + void *malloc([[maybe_unused]] size_t size) noexcept { return nullptr; } void *calloc(size_t, size_t) noexcept { return nullptr; } void *realloc(void *, size_t) noexcept { return nullptr; } void *aligned_malloc(size_t, size_t) noexcept { return nullptr; } @@ -120,7 +132,7 @@ struct malloc_pool : public pool_base { struct proxy_pool : public pool_base { umf_result_t initialize(umf_memory_provider_handle_t *providers, - size_t numProviders) noexcept { + [[maybe_unused]] size_t numProviders) noexcept { this->provider = providers[0]; return UMF_RESULT_SUCCESS; } @@ -128,15 +140,17 @@ struct proxy_pool : public pool_base { void *calloc(size_t num, size_t size) noexcept { void *ptr; auto ret = umfMemoryProviderAlloc(provider, num * size, 0, &ptr); + umf::getPoolLastStatusRef() = ret; - memset(ptr, 0, num * size); - - if (ptr) { - EXPECT_EQ_NOEXCEPT(ret, UMF_RESULT_SUCCESS); + if (!ptr) { + return ptr; } + + memset(ptr, 0, num * size); return ptr; } - void *realloc(void *ptr, size_t size) noexcept { + void *realloc([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { // TODO: not supported umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; @@ -145,18 +159,15 @@ struct proxy_pool : public pool_base { void *aligned_malloc(size_t size, size_t alignment) noexcept { void *ptr; auto ret = umfMemoryProviderAlloc(provider, size, alignment, &ptr); - if (ptr) { - EXPECT_EQ_NOEXCEPT(ret, UMF_RESULT_SUCCESS); - } + umf::getPoolLastStatusRef() = ret; return ptr; } - size_t malloc_usable_size(void *ptr) noexcept { + size_t malloc_usable_size([[maybe_unused]] void *ptr) noexcept { // TODO: not supported return 0; } enum umf_result_t free(void *ptr) noexcept { auto ret = umfMemoryProviderFree(provider, ptr, 0); - EXPECT_EQ_NOEXCEPT(ret, UMF_RESULT_SUCCESS); return ret; } enum umf_result_t get_last_allocation_error() { diff --git a/test/unified_malloc_framework/common/provider.c b/test/unified_malloc_framework/common/provider.c index 8f9e946bfc..303d8aea8d 100644 --- a/test/unified_malloc_framework/common/provider.c +++ b/test/unified_malloc_framework/common/provider.c @@ -23,7 +23,7 @@ static enum umf_result_t nullAlloc(void *provider, size_t size, (void)provider; (void)size; (void)alignment; - (void)ptr; + *ptr = NULL; return UMF_RESULT_SUCCESS; } diff --git a/test/unified_malloc_framework/common/provider.hpp b/test/unified_malloc_framework/common/provider.hpp index 518b2b0528..6b121e39f1 100644 --- a/test/unified_malloc_framework/common/provider.hpp +++ b/test/unified_malloc_framework/common/provider.hpp @@ -30,21 +30,27 @@ struct provider_base { enum umf_result_t alloc(size_t, size_t, void **) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t free(void *ptr, size_t size) noexcept { + enum umf_result_t free([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } void get_last_native_error(const char **, int32_t *) noexcept {} - enum umf_result_t get_recommended_page_size(size_t size, - size_t *pageSize) noexcept { + enum umf_result_t + get_recommended_page_size([[maybe_unused]] size_t size, + [[maybe_unused]] size_t *pageSize) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t get_min_page_size(void *ptr, size_t *pageSize) noexcept { + enum umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t purge_lazy(void *ptr, size_t size) noexcept { + enum umf_result_t purge_lazy([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t purge_force(void *ptr, size_t size) noexcept { + enum umf_result_t purge_force([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } const char *get_name() noexcept { return "base"; } @@ -76,6 +82,28 @@ struct provider_malloc : public provider_base { const char *get_name() noexcept { return "malloc"; } }; +struct provider_mock_out_of_mem : public provider_base { + provider_malloc helper_prov; + int allocNum = 0; + umf_result_t initialize(int allocNum) noexcept { + this->allocNum = allocNum; + return UMF_RESULT_SUCCESS; + } + enum umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { + if (allocNum <= 0) { + *ptr = nullptr; + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + allocNum--; + + return helper_prov.alloc(size, align, ptr); + } + enum umf_result_t free(void *ptr, size_t size) noexcept { + return helper_prov.free(ptr, size); + } + const char *get_name() noexcept { return "mock_out_of_mem"; } +}; + } // namespace umf_test #endif /* UMF_TEST_PROVIDER_HPP */ diff --git a/test/unified_malloc_framework/memoryPool.hpp b/test/unified_malloc_framework/memoryPool.hpp index fde5954cf8..ab923932fb 100644 --- a/test/unified_malloc_framework/memoryPool.hpp +++ b/test/unified_malloc_framework/memoryPool.hpp @@ -3,7 +3,9 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "disjoint_pool.hpp" #include "pool.hpp" +#include "provider.hpp" #include #include @@ -53,6 +55,29 @@ struct umfMultiPoolTest : umfPoolTest { std::vector pools; }; +struct umfMemTest + : umf_test::test, + ::testing::WithParamInterface< + std::tuple, int>> { + umfMemTest() : pool(nullptr, nullptr), expectedRecycledPoolAllocs(0) {} + void SetUp() override { + test::SetUp(); + initialize(); + } + + void TearDown() override { test::TearDown(); } + + void initialize() { + auto [pool_fun, expectedRecycledPoolAllocs] = this->GetParam(); + EXPECT_NE(pool_fun(), nullptr); + this->pool = pool_fun(); + this->expectedRecycledPoolAllocs = expectedRecycledPoolAllocs; + } + + umf::pool_unique_handle_t pool; + int expectedRecycledPoolAllocs; +}; + TEST_P(umfPoolTest, allocFree) { static constexpr size_t allocSize = 64; auto *ptr = umfPoolMalloc(pool.get(), allocSize); @@ -251,6 +276,46 @@ TEST_P(umfPoolTest, multiThreadedMallocFreeRandomSizes) { } } +TEST_P(umfMemTest, outOfMem) { + static constexpr size_t allocSize = 4096; + auto hPool = pool.get(); + + std::vector allocations; + + while (true) { + allocations.emplace_back(umfPoolMalloc(hPool, allocSize)); + if (allocations.back() == nullptr && + umfPoolGetLastAllocationError(hPool) == + UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) { + break; + } + ASSERT_NE(allocations.back(), nullptr); + } + + // next part of the test- freeing some memory to allocate it again (as the memory + // should be acquired from the pool itself now, not from the provider), + // is done only for the disjoint pool for now + + // remove last nullptr from the allocations vector + ASSERT_EQ(allocations.back(), nullptr); + allocations.pop_back(); + + ASSERT_NE(allocations.back(), nullptr); + for (int i = 0; i < expectedRecycledPoolAllocs; i++) { + umfPoolFree(hPool, allocations.back()); + allocations.pop_back(); + } + + for (int i = 0; i < expectedRecycledPoolAllocs; i++) { + allocations.emplace_back(umfPoolMalloc(hPool, allocSize)); + ASSERT_NE(allocations.back(), nullptr); + } + + for (auto allocation : allocations) { + umfPoolFree(hPool, allocation); + } +} + #ifdef UMF_ENABLE_POOL_TRACKING_TESTS // TODO: add similar tests for realloc/aligned_alloc, etc. // TODO: add multithreaded tests diff --git a/test/unified_malloc_framework/memoryPoolAPI.cpp b/test/unified_malloc_framework/memoryPoolAPI.cpp index d40254fbf0..82d3768611 100644 --- a/test/unified_malloc_framework/memoryPoolAPI.cpp +++ b/test/unified_malloc_framework/memoryPoolAPI.cpp @@ -82,7 +82,7 @@ TEST_F(test, memoryPoolTrace) { ASSERT_EQ(providerCalls.size(), provider_call_count); ret = umfPoolGetLastAllocationError(tracingPool.get()); - ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(poolCalls["get_last_native_error"], 1); ASSERT_EQ(poolCalls.size(), ++pool_call_count); @@ -157,6 +157,14 @@ INSTANTIATE_TEST_SUITE_P( .second; })); +INSTANTIATE_TEST_SUITE_P( + proxyPoolOOMTest, umfMemTest, + ::testing::Values(std::tuple( + [] { + return umf_test::makePoolWithOOMProvider(10); + }, + 0))); + ////////////////// Negative test cases ///////////////// TEST_F(test, memoryPoolInvalidProvidersNullptr) { @@ -187,9 +195,10 @@ TEST_P(poolInitializeTest, errorPropagation) { umf_memory_provider_handle_t providers[] = {nullProvider.get()}; struct pool : public umf_test::pool_base { - umf_result_t initialize(umf_memory_provider_handle_t *providers, - size_t numProviders, - umf_result_t errorToReturn) noexcept { + umf_result_t + initialize([[maybe_unused]] umf_memory_provider_handle_t *providers, + [[maybe_unused]] size_t numProviders, + umf_result_t errorToReturn) noexcept { return errorToReturn; } }; @@ -232,7 +241,8 @@ TEST_F(test, getLastFailedMemoryProvider) { return allocResult; } - enum umf_result_t free(void *ptr, size_t size) noexcept { + enum umf_result_t free(void *ptr, + [[maybe_unused]] size_t size) noexcept { ::free(ptr); return UMF_RESULT_SUCCESS; } @@ -254,10 +264,8 @@ TEST_F(test, getLastFailedMemoryProvider) { auto [ret, pool] = umf::poolMakeUnique(&hProvider, 1); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ASSERT_EQ(umfGetLastFailedMemoryProvider(), nullptr); auto ptr = umfPoolMalloc(pool.get(), allocSize); ASSERT_NE(ptr, nullptr); - ASSERT_EQ(umfGetLastFailedMemoryProvider(), nullptr); umfPoolFree(pool.get(), ptr); // make provider return an error during allocation diff --git a/test/unified_malloc_framework/memoryProviderAPI.cpp b/test/unified_malloc_framework/memoryProviderAPI.cpp index fa02f9eb99..02a7fa357d 100644 --- a/test/unified_malloc_framework/memoryProviderAPI.cpp +++ b/test/unified_malloc_framework/memoryProviderAPI.cpp @@ -23,7 +23,8 @@ TEST_F(test, memoryProviderTrace) { size_t call_count = 0; - auto ret = umfMemoryProviderAlloc(tracingProvider.get(), 0, 0, nullptr); + void *ptr; + auto ret = umfMemoryProviderAlloc(tracingProvider.get(), 0, 0, &ptr); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(calls["alloc"], 1); ASSERT_EQ(calls.size(), ++call_count); diff --git a/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp b/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp index 9e4d4f7ee6..0e81342bef 100644 --- a/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp +++ b/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp @@ -11,6 +11,7 @@ #include "disjoint_pool.hpp" #include "memoryPool.hpp" +#include "pool.hpp" #include "provider.h" #include "provider.hpp" @@ -42,7 +43,8 @@ TEST_F(test, freeErrorPropagation) { *ptr = malloc(size); return UMF_RESULT_SUCCESS; } - enum umf_result_t free(void *ptr, size_t size) noexcept { + enum umf_result_t free(void *ptr, + [[maybe_unused]] size_t size) noexcept { ::free(ptr); return freeReturn; } @@ -72,6 +74,15 @@ TEST_F(test, freeErrorPropagation) { INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfPoolTest, ::testing::Values(makePool)); +INSTANTIATE_TEST_SUITE_P( + disjointPoolTests, umfMemTest, + ::testing::Values(std::make_tuple( + [] { + return umf_test::makePoolWithOOMProvider( + static_cast(poolConfig().Capacity), poolConfig()); + }, + static_cast(poolConfig().Capacity) / 2))); + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfMultiPoolTest); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(makePool)); diff --git a/test/unit/utils/params.cpp b/test/unit/utils/params.cpp index 964d117e49..6b687e6878 100644 --- a/test/unit/utils/params.cpp +++ b/test/unit/utils/params.cpp @@ -10,37 +10,37 @@ #include #include "ur_api.h" -#include "ur_params.hpp" +#include "ur_print.hpp" template class ParamsTest : public testing::Test { protected: T params; }; -struct UrInitParams { - ur_init_params_t params; +struct UrLoaderInitParams { + ur_loader_init_params_t params; ur_device_init_flags_t flags; ur_loader_config_handle_t config; - UrInitParams(ur_device_init_flags_t _flags) + UrLoaderInitParams(ur_device_init_flags_t _flags) : flags(_flags), config(nullptr) { params.pdevice_flags = &flags; params.phLoaderConfig = &config; } - ur_init_params_t *get_struct() { return ¶ms; } + ur_loader_init_params_t *get_struct() { return ¶ms; } }; -struct UrInitParamsNoFlags : UrInitParams { - UrInitParamsNoFlags() : UrInitParams(0) {} +struct UrLoaderInitParamsNoFlags : UrLoaderInitParams { + UrLoaderInitParamsNoFlags() : UrLoaderInitParams(0) {} const char *get_expected() { return ".device_flags = 0, .hLoaderConfig = nullptr"; }; }; -struct UrInitParamsInvalidFlags : UrInitParams { - UrInitParamsInvalidFlags() - : UrInitParams(UR_DEVICE_INIT_FLAG_GPU | UR_DEVICE_INIT_FLAG_MCA | - UR_BIT(25) | UR_BIT(30) | UR_BIT(31)) {} +struct UrLoaderInitParamsInvalidFlags : UrLoaderInitParams { + UrLoaderInitParamsInvalidFlags() + : UrLoaderInitParams(UR_DEVICE_INIT_FLAG_GPU | UR_DEVICE_INIT_FLAG_MCA | + UR_BIT(25) | UR_BIT(30) | UR_BIT(31)) {} const char *get_expected() { return ".device_flags = UR_DEVICE_INIT_FLAG_GPU \\| " "UR_DEVICE_INIT_FLAG_MCA \\| unknown bit flags " @@ -367,34 +367,59 @@ struct UrDevicePartitionPropertyTest { ur_device_partition_property_t prop; }; +struct UrSamplerAddressModesTest { + UrSamplerAddressModesTest() { + prop.addrModes[0] = UR_SAMPLER_ADDRESSING_MODE_CLAMP; + prop.addrModes[1] = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + prop.addrModes[2] = UR_SAMPLER_ADDRESSING_MODE_REPEAT; + prop.pNext = nullptr; + prop.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES; + } + ur_exp_sampler_addr_modes_t &get_struct() { return prop; } + const char *get_expected() { + return "\\(struct ur_exp_sampler_addr_modes_t\\)" + "\\{" + ".stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES, " + ".pNext = nullptr, " + ".addrModes = \\{" + "UR_SAMPLER_ADDRESSING_MODE_CLAMP, " + "UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, " + "UR_SAMPLER_ADDRESSING_MODE_REPEAT" + "\\}" + "\\}"; + } + + ur_exp_sampler_addr_modes_t prop; +}; + using testing::Types; -typedef Types< - UrInitParamsNoFlags, UrInitParamsInvalidFlags, UrUsmHostAllocParamsEmpty, - UrPlatformGetEmptyArray, UrPlatformGetTwoPlatforms, - UrUsmHostAllocParamsUsmDesc, UrUsmHostAllocParamsHostDesc, - UrDeviceGetInfoParamsEmpty, UrDeviceGetInfoParamsName, - UrDeviceGetInfoParamsQueueFlag, UrDeviceGetInfoParamsPartitionArray, - UrContextGetInfoParamsDevicesArray, UrDeviceGetInfoParamsInvalidSize, - UrProgramMetadataTest, UrDevicePartitionPropertyTest> +typedef Types Implementations; using ::testing::MatchesRegex; -using namespace ur_params; TYPED_TEST_SUITE(ParamsTest, Implementations, ); -TYPED_TEST(ParamsTest, Serialize) { +TYPED_TEST(ParamsTest, Print) { std::ostringstream out; out << this->params.get_struct(); EXPECT_THAT(out.str(), MatchesRegex(this->params.get_expected())); } -TEST(SerializePtr, nested_void_ptrs) { +TEST(PrintPtr, nested_void_ptrs) { void *real = (void *)0xFEEDCAFEull; void **preal = ℜ void ***ppreal = &preal; void ****pppreal = &ppreal; std::ostringstream out; - serializePtr(out, pppreal); + ur::details::printPtr(out, pppreal); EXPECT_THAT(out.str(), MatchesRegex(".+ \\(.+ \\(.+ \\(.+\\)\\)\\)")); } diff --git a/test/usm/CMakeLists.txt b/test/usm/CMakeLists.txt index b673b6d1b9..fa5454d4db 100644 --- a/test/usm/CMakeLists.txt +++ b/test/usm/CMakeLists.txt @@ -10,6 +10,8 @@ function(add_usm_test name) add_ur_executable(${TEST_TARGET_NAME} ${UR_USM_TEST_DIR}/../conformance/source/environment.cpp ${UR_USM_TEST_DIR}/../conformance/source/main.cpp + ${UR_USM_TEST_DIR}/../unified_malloc_framework/common/provider.c + ${UR_USM_TEST_DIR}/../unified_malloc_framework/common/pool.c ${ARGN}) target_link_libraries(${TEST_TARGET_NAME} PRIVATE @@ -17,10 +19,12 @@ function(add_usm_test name) ${PROJECT_NAME}::loader ur_testing GTest::gtest_main) - add_test(NAME usm-${name} + add_test(NAME usm-${name} COMMAND ${TEST_TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_tests_properties(usm-${name} PROPERTIES LABELS "usm") + set_tests_properties(usm-${name} PROPERTIES + LABELS "usm" + ENVIRONMENT "UR_ADAPTERS_FORCE_LOAD=\"$\"") target_compile_definitions("usm_test-${name}" PRIVATE DEVICES_ENVIRONMENT) endfunction() diff --git a/test/usm/usmPoolManager.cpp b/test/usm/usmPoolManager.cpp index eaf44e119d..6d2eb33bfe 100644 --- a/test/usm/usmPoolManager.cpp +++ b/test/usm/usmPoolManager.cpp @@ -3,19 +3,18 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "../unified_malloc_framework/common/pool.hpp" -#include "../unified_malloc_framework/common/provider.hpp" #include "ur_pool_manager.hpp" -#include +#include "../unified_malloc_framework/common/pool.h" +#include "../unified_malloc_framework/common/provider.h" -#include +#include -struct urUsmPoolManagerTest +struct urUsmPoolDescriptorTest : public uur::urMultiDeviceContextTest, ::testing::WithParamInterface {}; -TEST_P(urUsmPoolManagerTest, poolIsPerContextTypeAndDevice) { +TEST_P(urUsmPoolDescriptorTest, poolIsPerContextTypeAndDevice) { auto &devices = uur::DevicesEnvironment::instance->devices; auto poolHandle = this->GetParam(); @@ -49,7 +48,71 @@ TEST_P(urUsmPoolManagerTest, poolIsPerContextTypeAndDevice) { ASSERT_EQ(sharedPools, devices.size() * 2); } -INSTANTIATE_TEST_SUITE_P(urUsmPoolManagerTest, urUsmPoolManagerTest, +INSTANTIATE_TEST_SUITE_P(urUsmPoolDescriptorTest, urUsmPoolDescriptorTest, ::testing::Values(nullptr)); // TODO: add test with sub-devices + +struct urUsmPoolManagerTest : public uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urContextTest::SetUp()); + auto [ret, descs] = usm::pool_descriptor::create(nullptr, context); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + poolDescriptors = std::move(descs); + } + + std::vector poolDescriptors; +}; + +TEST_P(urUsmPoolManagerTest, poolManagerPopulate) { + auto [ret, manager] = usm::pool_manager::create(); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + for (auto &desc : poolDescriptors) { + // Populate the pool manager + auto pool = nullPoolCreate(); + ASSERT_NE(pool, nullptr); + auto poolUnique = umf::pool_unique_handle_t(pool, umfPoolDestroy); + ASSERT_NE(poolUnique, nullptr); + ret = manager.addPool(desc, poolUnique); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + } + + for (auto &desc : poolDescriptors) { + // Confirm that there is a pool for each descriptor + auto hPoolOpt = manager.getPool(desc); + ASSERT_TRUE(hPoolOpt.has_value()); + ASSERT_NE(hPoolOpt.value(), nullptr); + } +} + +TEST_P(urUsmPoolManagerTest, poolManagerInsertExisting) { + auto [ret, manager] = usm::pool_manager::create(); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + const auto &desc = poolDescriptors[0]; + + auto pool = nullPoolCreate(); + ASSERT_NE(pool, nullptr); + auto poolUnique = umf::pool_unique_handle_t(pool, umfPoolDestroy); + ASSERT_NE(poolUnique, nullptr); + + ret = manager.addPool(desc, poolUnique); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + // Inserting an existing key should return an error + ret = manager.addPool(desc, poolUnique); + ASSERT_EQ(ret, UR_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(urUsmPoolManagerTest, poolManagerGetNonexistant) { + auto [ret, manager] = usm::pool_manager::create(); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + for (auto &desc : poolDescriptors) { + auto hPool = manager.getPool(desc); + ASSERT_FALSE(hPool.has_value()); + } +} + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urUsmPoolManagerTest); diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 3628039ede..9aff32b1a4 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -12,7 +12,7 @@ exhale==0.3.0 idna==2.8 imagesize==1.1.0 Jinja2==2.11.3 -lxml==4.9.1 +lxml==4.9.3 Mako==1.1.0 MarkupSafe==1.1.1 packaging==19.2 diff --git a/tools/urinfo/urinfo.cpp b/tools/urinfo/urinfo.cpp index 8002cf186b..d1463ea4fa 100644 --- a/tools/urinfo/urinfo.cpp +++ b/tools/urinfo/urinfo.cpp @@ -26,7 +26,7 @@ struct app { UR_CHECK(urLoaderConfigCreate(&loaderConfig)); UR_CHECK(urLoaderConfigEnableLayer(loaderConfig, "UR_LAYER_FULL_VALIDATION")); - UR_CHECK(urInit(0, loaderConfig)); + UR_CHECK(urLoaderInit(0, loaderConfig)); enumerateDevices(); } @@ -174,7 +174,7 @@ devices which are currently visible in the local execution environment. ~app() { urLoaderConfigRelease(loaderConfig); - urTearDown(nullptr); + urLoaderTearDown(); } }; } // namespace urinfo diff --git a/tools/urinfo/utils.hpp b/tools/urinfo/utils.hpp index bbbd327cb9..d2e26255c2 100644 --- a/tools/urinfo/utils.hpp +++ b/tools/urinfo/utils.hpp @@ -6,7 +6,7 @@ #pragma once #include "ur_api.h" -#include "ur_params.hpp" +#include "ur_print.hpp" #include #include #include diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp index b502f0d802..a78cb82d08 100644 --- a/tools/urtrace/collector.cpp +++ b/tools/urtrace/collector.cpp @@ -28,7 +28,7 @@ #include "logger/ur_logger.hpp" #include "ur_api.h" -#include "ur_params.hpp" +#include "ur_print.hpp" #include "ur_util.hpp" #include "xpti/xpti_trace_framework.h" @@ -245,10 +245,10 @@ class JsonWriter : public TraceWriter { "\"tid\": \"\", \"ts\": \"\"}}"); out.info("]\n}}"); } - void begin(uint64_t id, const char *fname, std::string args) override {} + void begin(uint64_t, const char *, std::string) override {} - void end(uint64_t id, const char *fname, std::string args, Timepoint tp, - Timepoint start_tp, const ur_result_t *resultp) override { + void end(uint64_t, const char *fname, std::string args, Timepoint tp, + Timepoint start_tp, const ur_result_t *) override { auto dur = tp - start_tp; auto ts_us = std::chrono::duration_cast( tp.time_since_epoch()) @@ -288,8 +288,6 @@ static std::unique_ptr &writer() { return writer; } -using namespace ur_params; - struct fn_context { uint64_t instance; std::optional start; @@ -314,10 +312,9 @@ std::optional pop_instance_data(uint64_t instance) { return data; } -XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, - xpti::trace_event_data_t *parent, - xpti::trace_event_data_t *event, - uint64_t instance, const void *user_data) { +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, xpti::trace_event_data_t *, + xpti::trace_event_data_t *, uint64_t instance, + const void *user_data) { // stop the the clock as the very first thing, only used for TRACE_FN_END auto time_for_end = Clock::now(); auto *args = static_cast(user_data); @@ -334,8 +331,8 @@ XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, if (cli_args.no_args) { args_str << "..."; } else { - ur_params::serializeFunctionParams(args_str, args->function_id, - args->args_data); + ur::extras::printFunctionParams( + args_str, (enum ur_function_t)args->function_id, args->args_data); } if (trace_type == TRACE_FN_BEGIN) { @@ -366,8 +363,7 @@ XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, * Called for every stream. */ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, - unsigned int minor_version, - const char *version_str, + unsigned int minor_version, const char *, const char *stream_name) { if (stream_name == nullptr) { out.debug("Found stream with null name. Skipping...");