Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements a custom kernel for RandomForestRegressor easier to optimize #41

Merged
merged 34 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*.sln
*.cmake
*.whl
*.def
/*.png
/*.onnx
.build_path.txt
Expand Down
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.2.0
+++++

* :pr:`41`: implements a custom kernel for RandomForestRegressor easier to optimize
* :pr:`34`: update to onnxruntime v1.15.1
* :pr:`31`: implement a custom CUDA kernel (gemm)
* :pr:`32`: update to onnxruntime v1.15.0
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ onnx-extended: extensions for onnx and onnxruntime
**onnx-extended** extends the list of supported operators in onnx
reference implementation, or implements faster versions in C++.
Documentation `onnx-extended
<http://www.xavierdupre.fr/app/onnx-extended/helpsphinx/index.html>`_.
<https://sdpython.github.io/doc/onnx-extended/>`_.
Source are available on `github/onnx-extended
<https://github.com/sdpython/onnx-extended>`_.

Expand Down Expand Up @@ -115,9 +115,9 @@ can be enabled with the following command:

::

python setup.py build_ext --inplace --enable_nvtx 1
# or
pip install -e . --config-settings="--enable_nvtx=1"
python setup.py build_ext --inplace --use_nvtx 1
# or (not working yet)
pip install -e . --config-settings="--use_nvtx=1"

Experimental cython binding for onnxruntime
+++++++++++++++++++++++++++++++++++++++++++
Expand Down
8 changes: 6 additions & 2 deletions _cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
cmake_minimum_required(VERSION 3.24.0)
project(onnx_extended VERSION 0.2.0)
project(onnx_extended VERSION ${ONNX_EXTENDED_VERSION})

#
# initialisation
#

message(STATUS "-------------------")
message(STATUS "ONNX_EXTENDED_VERSION=${ONNX_EXTENDED_VERSION}")
message(STATUS "CMAKE_VERSION=${CMAKE_VERSION}")
message(STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
message(STATUS "CMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}")
Expand All @@ -25,6 +26,7 @@ message(STATUS "USE_CUDA=${USE_CUDA}")
message(STATUS "CUDA_BUILD=${CUDA_BUILD}")
message(STATUS "USE_NVTX=${USE_NVTX}")
message(STATUS "ORT_VERSION=${ORT_VERSION}")

# message(STATUS "ENV-PATH=$ENV{PATH}")
# message(STATUS "ENV-PYTHONPATH=$ENV{PYTHONPATH}")
message(STATUS "--------------------------------------------")
Expand All @@ -44,8 +46,8 @@ list(APPEND CMAKE_MODULE_PATH
# Packages and constants
#

include("load_externals.cmake")
include("constants.cmake")
include("load_externals.cmake")

#
# modules
Expand All @@ -61,8 +63,10 @@ include("targets/cuda_example_py.cmake")
include("targets/vector_function_cy.cmake")

set(ORTOPS_INCLUDE_DIR "${ROOT_INCLUDE_PATH}/onnx_extended/ortops")
set(REFOPS_INCLUDE_DIR "${ROOT_INCLUDE_PATH}/onnx_extended/reference/c_ops")
include("targets/ortops_tutorial_cpu.cmake")
include("targets/ortops_tutorial_cuda.cmake")
include("targets/ortops_optim_cpu.cmake")

#
# write version
Expand Down
6 changes: 3 additions & 3 deletions _cmake/clang_format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ echo "--cython-lint--"
cython-lint .
echo "--clang-format--"
find onnx_extended -type f \( -name "*.h" -o -name "*.hpp" -o -name "*.cuh" -o -name "*.cpp" -o -name "*.cc" -o -name "*.cu" \) | while read f; do
echo "Processing '$f'";
clang-format --length 88 -i $f;
echo "clang-format -i $f";
clang-format -i $f;
done
echo "--cmake-lint--"
find _cmake -type f \( -name "*.cmake" -o -name "*.txt" \) | while read f; do
echo "Processing '$f'";
echo "cmake-lint $f --line-width=88 --disabled-codes C0103 C0113";
cmake-lint $f --line-width=88 --disabled-codes C0103 C0113;
done
14 changes: 14 additions & 0 deletions _cmake/constants.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
#
# python extension
#
if(MSVC)
set(DLLEXT "dll")
elseif(APPLE)
set(DLLEXT "dylib")
else()
set(DLLEXT "so")
endif()

#
# C++ 14 or C++ 17
#
Expand Down Expand Up @@ -37,6 +48,9 @@ else()
endif()

if(APPLE)
message(STATUS "APPLE: set env var for open mp: CC, CCX, LDFLAGS, CPPFLAGS")
set(ENV{CC} "/usr/local/opt/llvm/bin/clang")
set(ENV{CXX} "/usr/local/opt/llvm/bin/clang++")
set(ENV(LDFLAGS) "-L/usr/local/opt/llvm/lib")
set(ENV(CPPFLAGS) "-I/usr/local/opt/llvm/include")
endif()
Expand Down
74 changes: 58 additions & 16 deletions _cmake/externals/FindCudaExtension.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
# Defines USE_NTVX to enable profiling with NVIDIA profiler.
# CUDA_VERSION must be defined as well.

if(${CMAKE_CUDA_COMPILER} STREQUAL "/usr/bin/nvcc")
message(FATAL_ERROR
"CMAKE_CUDA_COMPILER is equal to '${CMAKE_CUDA_COMPILER}', "
"CUDA_VERSION=${CUDA_VERSION}, "
"CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}, "
"You should specify the cuda version by adding --cuda-version=...")
endif()

if(CUDA_VERSION)
find_package(CUDAToolkit ${CUDA_VERSION} EXACT)
else()
Expand All @@ -14,6 +22,27 @@ message(STATUS "CUDAToolkit_FOUND=${CUDAToolkit_FOUND}")

if(CUDAToolkit_FOUND)

message(STATUS "befor1 language CUDA_VERSION=${CUDA_VERSION}")
message(STATUS "befor1 language CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
message(STATUS "befor1 language CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")

if(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
set(CMAKE_CUDA_ARCHITECTURES "native")
endif()
if(CMAKE_CUDA_COMPILER STREQUAL "CMAKE_CUDA_COMPILER-NOTFOUND")
if(CUDA_VERSION STREQUAL "")
message(FATAL_ERROR "No CMAKE_CUDA_COMPILER for CUDA_VERSION=${CUDA_VERSION}. "
"You can use --cuda-version=<CUDA_VERSION> or set "
"CUDACXX=/usr/local/cuda-<CUDA_VERSION>/bin/nvcc")
else()
set(CMAKE_CUDA_COMPILER "/usr/local/cuda-${CUDA_VERSION}/bin/nvcc")
message(STATUS "set CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")
endif()
endif()

message(STATUS "before language CUDA_VERSION=${CUDA_VERSION}")
message(STATUS "before language CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
message(STATUS "before language CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")
enable_language(CUDA)
message(STATUS "------------- CUDA settings")
message(STATUS "CUDA_VERSION=${CUDA_VERSION}")
Expand All @@ -30,7 +59,7 @@ if(CUDAToolkit_FOUND)
"< ${CUDA_VERSION}, nvcc is not setup properly. "
"Try 'whereis nvcc' and chack the version.")
endif()

set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

Expand All @@ -42,11 +71,13 @@ if(CUDAToolkit_FOUND)

if(CUDA_BUILD STREQUAL "H100opt")

# see https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
# see https://arnon.dk/
# matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
set(CMAKE_CUDA_ARCHITECTURES 90)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90a,code=sm_90a")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90a,code=compute_90a")
set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90a,code=compute_90a")

else() # H100, DEFAULT

Expand All @@ -64,25 +95,36 @@ if(CUDAToolkit_FOUND)
endif()

if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
message(FATAL_ERROR "CUDA verions must be >= 11 but is ${CMAKE_CUDA_COMPILER_VERSION}.")
message(FATAL_ERROR "CUDA verions must be >= 11 but is "
"${CMAKE_CUDA_COMPILER_VERSION}.")
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12)
# 37, 50 still work in CUDA 11 but are marked deprecated and will be removed in future CUDA version.
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37") # K80
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50") # M series
# 37, 50 still work in CUDA 11
# but are marked deprecated and will be removed in future CUDA version.
# K80
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37")
# M series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50")
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52") # M60
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") # P series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_61,code=sm_61") # P series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70") # V series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75") # T series
# M60
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52")
# P series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60")
# P series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_61,code=sm_61")
# V series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70")
# T series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75")
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80") # A series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_86,code=sm_86") # A series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # A series
# A series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_86,code=sm_86")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87")
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.8)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90") # H series
# H series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90")
endif()
endif()

Expand Down
6 changes: 0 additions & 6 deletions _cmake/externals/FindMyPython.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,6 @@ else()
message(STATUS "Use find_package(Python3).")
set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
if(APPLE)
message(STATUS "APPLE: set env var for open mp: CC, CCX, LDFLAGS, CPPFLAGS")
set(ENV{CC} "/usr/local/opt/llvm/bin/clang")
set(ENV{CXX} "/usr/local/opt/llvm/bin/clang++")
set(ENV{LDFLAGS} "-L/usr/local/opt/llvm/lib")
set(ENV{CPPFLAGS} "-I/usr/local/opt/llvm/include")

find_package(Python3 ${PYTHON_VERSION} COMPONENTS
Interpreter Development.Module
REQUIRED)
Expand Down
32 changes: 15 additions & 17 deletions _cmake/externals/FindOrt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# downloads onnxruntime as a binary
# functions ort_add_dependency, ort_add_custom_op

file(WRITE "../_setup_ext.txt" "")

if(NOT ORT_VERSION)
set(ORT_VERSION 1.15.1)
set(ORT_VERSION_INT 1150)
Expand Down Expand Up @@ -58,14 +60,6 @@ else()
set(ORT_URL ${ORT_VERSION})
endif()

if(MSVC)
set(DLLEXT "dll")
elseif(APPLE)
set(DLLEXT "dylib")
else()
set(DLLEXT "so")
endif()

find_library(ONNXRUNTIME onnxruntime HINTS "${ONNXRUNTIME_LIB_DIR}")
if(ONNXRUNTIME-NOTFOUND)
message(FATAL_ERROR "onnxruntime cannot be found at '${ONNXRUNTIME_LIB_DIR}'")
Expand Down Expand Up @@ -96,26 +90,27 @@ endif()
#
function(ort_add_dependency name folder_copy)
get_target_property(target_output_directory ${name} BINARY_DIR)
message(STATUS "ort copy ${ORT_LIB_FILES_LENGTH} files from '${ONNXRUNTIME_LIB_DIR}'")
message(STATUS "ort: copy-1 ${ORT_LIB_FILES_LENGTH} files from '${ONNXRUNTIME_LIB_DIR}'")
if(MSVC)
set(destination_dir ${target_output_directory}/${CMAKE_BUILD_TYPE})
else()
set(destination_dir ${target_output_directory})
endif()
message(STATUS "ort copy to '${destination_dir}'")
message(STATUS "ort: copy-2 to '${destination_dir}'")
if(folder_copy)
message(STATUS "ort copy to '${folder_copy}'")
message(STATUS "ort: copy-3 to '${folder_copy}'")
endif()
foreach(file_i ${ORT_LIB_FILES})
if(NOT EXISTS ${destination_dir}/${file_i})
message(STATUS "ort copy '${file_i}' to '${destination_dir}'")
message(STATUS "ort: copy-4 '${file_i}' to '${destination_dir}'")
add_custom_command(
TARGET ${name} POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${destination_dir})
endif()
if(folder_copy)
if(NOT EXISTS ${folder_copy}/${file_i})
message(STATUS "ort copy '${file_i}' to '${folder_copy}'")
message(STATUS "ort: copy-5 '${file_i}' to '${folder_copy}'")
# file(APPEND "../_setup_ext.txt" "copy,${file_i},${folder_copy}\n")
add_custom_command(
TARGET ${name} POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${folder_copy})
Expand All @@ -125,8 +120,6 @@ function(ort_add_dependency name folder_copy)
# file(COPY ${ORT_LIB_FILES} DESTINATION ${target_output_directory})
endfunction()

file(WRITE "../_setup_ext.txt" "")

#
#! ort_add_custom_op : compile a pyx file into cpp
#
Expand All @@ -136,8 +129,13 @@ file(WRITE "../_setup_ext.txt" "")
# \argn: C++ file to compile
#
function(ort_add_custom_op name provider folder)
if (WIN32)
file(WRITE "${folder}/${name}.def" "LIBRARY "
"\"${name}.dll\"\nEXPORTS\n RegisterCustomOps @1")
list(APPEND ARGN "${folder}/${name}.def")
endif()
if (provider STREQUAL "CUDA")
message(STATUS "ort custom op ${provider}: '${name}': ${ARGN}")
message(STATUS "ort: custom op ${provider}: '${name}': ${ARGN}")
add_library(${name} SHARED ${ARGN})

# add property --use_fast_math to cu files
Expand Down Expand Up @@ -173,7 +171,7 @@ function(ort_add_custom_op name provider folder)
PRIVATE
${ONNXRUNTIME_INCLUDE_DIR})
else()
message(STATUS "ort custom op CPU: '${name}': ${ARGN}")
message(STATUS "ort: custom op CPU: '${name}': ${ARGN}")
add_library(${name} SHARED ${ARGN})
target_include_directories(${name} PRIVATE ${ONNXRUNTIME_INCLUDE_DIR})
target_compile_definitions(${name} PRIVATE ORT_VERSION=${ORT_VERSION_INT})
Expand Down
3 changes: 2 additions & 1 deletion _cmake/load_externals.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ message(STATUS "-------------------")
if(CUDA_AVAILABLE)
set(
config_content
"HAS_CUDA = 1\nCUDA_VERSION = '${CUDA_VERSION}'\nCUDA_VERSION_INT = ${CUDA_VERSION_INT}")
"HAS_CUDA = 1\nCUDA_VERSION = '${CUDA_VERSION}'"
"\nCUDA_VERSION_INT = ${CUDA_VERSION_INT}")
else()
set(config_content "HAS_CUDA = 0")
endif()
14 changes: 13 additions & 1 deletion _cmake/targets/c_op_conv_.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,21 @@ local_pybind11_add_module(
../onnx_extended/reference/c_ops/cpu/c_op_conv_.cpp)
eigen_add_dependency(c_op_conv_)

target_include_directories(
c_op_conv_
PRIVATE
${ROOT_INCLUDE_PATH}/onnx_extended)

add_executable(test_c_op_conv_cpp
../_unittests/ut_reference/test_c_op_conv.cpp
../onnx_extended/reference/c_ops/cpu/c_op_common.cpp)
target_include_directories(test_c_op_conv_cpp PRIVATE ${ROOT_INCLUDE_PATH})

target_include_directories(
test_c_op_conv_cpp
PRIVATE
${ROOT_INCLUDE_PATH}
${ROOT_INCLUDE_PATH}/onnx_extended)

eigen_add_dependency(test_c_op_conv_cpp)

add_test(NAME test_c_op_conv_cpp COMMAND test_c_op_conv_cpp)
4 changes: 4 additions & 0 deletions _cmake/targets/c_op_tree_ensemble_py_.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ local_pybind11_add_module(
../onnx_extended/reference/c_ops/cpu/c_op_common.cpp
../onnx_extended/reference/c_ops/cpu/c_op_tree_ensemble_py_.cpp)

target_include_directories(
c_op_tree_ensemble_py_
PRIVATE
${ROOT_INCLUDE_PATH}/onnx_extended)
Loading