Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial WebGPU EP checkin #22318

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llv
option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only")
option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)

# Options related to reducing the binary size produced by the build
# XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
Expand Down Expand Up @@ -910,6 +911,11 @@ if (onnxruntime_USE_WEBNN)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn)
endif()
if (onnxruntime_USE_WEBGPU)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu)
endif()
if (onnxruntime_USE_CANN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CANN=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1)
Expand Down
84 changes: 68 additions & 16 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,11 @@ if (onnxruntime_USE_MIMALLOC)
onnxruntime_fetchcontent_makeavailable(mimalloc)
endif()

#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn,
# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread
# pthread is always at the last
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME})
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json
onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface
flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date
${ONNXRUNTIME_CLOG_TARGET_NAME})

# The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types.
# The other libs do not have the problem. All the sources are already there. We can compile them in any order.
set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers)
Expand Down Expand Up @@ -634,24 +635,75 @@ if (onnxruntime_USE_COREML)
FetchContent_Populate(coremltools)
endif()

message(STATUS "Finished fetching external dependencies")
if (onnxruntime_USE_WEBGPU)
FetchContent_Declare(
dawn
URL ${DEP_URL_dawn}
URL_HASH SHA1=${DEP_SHA1_dawn}
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
)

# use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size
set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE)
set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE)
set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE)

# disable things we don't use
set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF)
set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE)
set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE)
set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE)
set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE)
set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE)

set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving
set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled.

# SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V.
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
fs-eire marked this conversation as resolved.
Show resolved Hide resolved
set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE)
endif()

if (WIN32)
# building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON.
set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE)
set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE)

# Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it.
set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE)
endif()

onnxruntime_fetchcontent_makeavailable(dawn)

set(onnxruntime_LINK_DIRS )
list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc)
endif()

set(onnxruntime_LINK_DIRS)
if (onnxruntime_USE_CUDA)
find_package(CUDAToolkit REQUIRED)
find_package(CUDAToolkit REQUIRED)

if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()
include(cuDNN)
if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()

include(cuDNN)
endif()

if(onnxruntime_USE_SNPE)
include(external/find_snpe.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
include(external/find_snpe.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
endif()

FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)
FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)

message(STATUS "Finished fetching external dependencies")
69 changes: 62 additions & 7 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function(get_c_cxx_api_headers HEADERS_VAR)

# need to add header files for enabled EPs
foreach(f ${ONNXRUNTIME_PROVIDER_NAMES})
# The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
# The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
# with onnxruntime_c_api.h . Most other EPs probably also do not work in this way.
if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm))
file(GLOB _provider_headers CONFIGURE_DEPENDS
Expand Down Expand Up @@ -89,10 +89,22 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
# create Info.plist for the framework and podspec for CocoaPods (optional)
set(MACOSX_FRAMEWORK_NAME "onnxruntime")
set(MACOSX_FRAMEWORK_IDENTIFIER "com.microsoft.onnxruntime")
# Need to include CoreML as a weaklink for CocoaPods package if the EP is enabled

# Setup weak frameworks for macOS/iOS. 'weak' as the CoreML or WebGPU EPs are optionally enabled.
if(onnxruntime_USE_COREML)
set(APPLE_WEAK_FRAMEWORK "\\\"CoreML\\\"")
list(APPEND _weak_frameworks "\\\"CoreML\\\"")
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND _weak_frameworks "\\\"QuartzCore\\\"")
list(APPEND _weak_frameworks "\\\"IOSurface\\\"")
list(APPEND _weak_frameworks "\\\"Metal\\\"")
endif()

if (_weak_frameworks)
string(JOIN ", " APPLE_WEAK_FRAMEWORK ${_weak_frameworks})
endif()

set(INFO_PLIST_PATH "${CMAKE_CURRENT_BINARY_DIR}/Info.plist")
configure_file(${REPO_ROOT}/cmake/Info.plist.in ${INFO_PLIST_PATH})
configure_file(
Expand Down Expand Up @@ -200,6 +212,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
${PROVIDERS_RKNPU}
${PROVIDERS_VSINPU}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBGPU}
${PROVIDERS_WEBNN}
${PROVIDERS_AZURE}
${PROVIDERS_INTERNAL_TESTING}
Expand Down Expand Up @@ -364,16 +377,58 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK)
endif()
endforeach()

# helper function that recurses to also handle static library dependencies of the ORT external libraries
set(_processed_libs) # keep track of processed libraries to skip any duplicate dependencies
function(add_symlink_for_static_lib_and_dependencies lib)
function(process cur_target)
# de-alias if applicable so a consistent target name is used
get_target_property(alias ${cur_target} ALIASED_TARGET)
if(TARGET ${alias})
set(cur_target ${alias})
endif()

if(${cur_target} IN_LIST _processed_libs OR ${cur_target} IN_LIST lib_and_dependencies)
return()
endif()

list(APPEND lib_and_dependencies ${cur_target})

get_target_property(link_libraries ${cur_target} LINK_LIBRARIES)
foreach(dependency ${link_libraries})
if(TARGET ${dependency})
process(${dependency})
endif()
endforeach()

set(lib_and_dependencies ${lib_and_dependencies} PARENT_SCOPE)
endfunction()

set(lib_and_dependencies)
process(${lib})

foreach(_target ${lib_and_dependencies})
get_target_property(type ${_target} TYPE)
if(${type} STREQUAL "STATIC_LIBRARY")
# message(STATUS "Adding symlink for ${_target}")
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
$<TARGET_FILE:${_target}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_target}>)
endif()
endforeach()

list(APPEND _processed_libs ${lib_and_dependencies})
set(_processed_libs ${_processed_libs} PARENT_SCOPE)
endfunction()

# for external libraries we create a symlink to the .a file
foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES})
if(NOT TARGET ${_LIB}) # if we didn't build from source. it may not a target
if(NOT TARGET ${_LIB}) # if we didn't build from source it may not be a target
continue()
endif()

GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
$<TARGET_FILE:${_LIB}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_LIB}>)
add_symlink_for_static_lib_and_dependencies(${_LIB})
endif()
endforeach()

Expand Down
5 changes: 4 additions & 1 deletion cmake/onnxruntime_nodejs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ endif()
if (onnxruntime_USE_DML)
set(NODEJS_BINDING_USE_DML "--use_dml")
endif()
if (onnxruntime_USE_WEBGPU)
set(NODEJS_BINDING_USE_WEBGPU "--use_webgpu")
endif()
if (onnxruntime_USE_TENSORRT)
set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt")
endif()
Expand All @@ -92,7 +95,7 @@ add_custom_target(js_common_npm_ci ALL
add_custom_target(nodejs_binding_wrapper ALL
COMMAND ${NPM_CLI} ci
COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU} ${NODEJS_BINDING_USE_TENSORRT}
${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
WORKING_DIRECTORY ${JS_NODE_ROOT}
COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
Expand Down
7 changes: 7 additions & 0 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ endif()
if(onnxruntime_USE_WEBNN)
set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
endif()
if(onnxruntime_USE_WEBGPU)
set(PROVIDERS_WEBGPU onnxruntime_providers_webgpu)
endif()
if (onnxruntime_USE_CANN)
set(PROVIDERS_CANN onnxruntime_providers_cann)
endif()
Expand Down Expand Up @@ -151,6 +154,10 @@ if (onnxruntime_USE_WEBNN)
include(onnxruntime_providers_webnn.cmake)
endif()

if (onnxruntime_USE_WEBGPU)
include(onnxruntime_providers_webgpu.cmake)
endif()

if (onnxruntime_USE_NNAPI_BUILTIN)
include(onnxruntime_providers_nnapi.cmake)
endif()
Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_providers_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
)

file(GLOB_RECURSE onnxruntime_webgpu_contrib_ops_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.h"
"${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.cc"
)

file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
Expand Down
27 changes: 27 additions & 0 deletions cmake/onnxruntime_providers_webgpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
message(FATAL_ERROR "WebGPU EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
endif()

add_compile_definitions(USE_WEBGPU=1)
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
endif()
file(GLOB_RECURSE onnxruntime_providers_webgpu_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.cc"
)
if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_webgpu_contrib_ops_cc_srcs})
list(APPEND onnxruntime_providers_webgpu_cc_srcs ${onnxruntime_webgpu_contrib_ops_cc_srcs})
endif()

source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_webgpu
onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc)

set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
1 change: 1 addition & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE
${PROVIDERS_ACL}
${PROVIDERS_ARMNN}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBGPU}
${PROVIDERS_AZURE}
${PROVIDERS_QNN}
onnxruntime_optimizer
Expand Down
12 changes: 12 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,10 @@ if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
endif()

if(onnxruntime_USE_RKNPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu)
endif()
Expand Down Expand Up @@ -603,6 +607,7 @@ set(ONNXRUNTIME_TEST_LIBS
${PROVIDERS_NNAPI}
${PROVIDERS_VSINPU}
${PROVIDERS_JS}
${PROVIDERS_WEBGPU}
${PROVIDERS_QNN}
${PROVIDERS_SNPE}
${PROVIDERS_RKNPU}
Expand Down Expand Up @@ -664,6 +669,13 @@ if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js)
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/webgpu/*)
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_webgpu)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_webgpu)
endif()

# QNN EP tests require CPU EP op implementations for accuracy evaluation, so disable on minimal
# or reduced op builds.
if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
Expand Down
Loading
Loading