Skip to content

Commit

Permalink
Implements a custom kernel for RandomForestRegressor easier to optimi…
Browse files Browse the repository at this point in the history
…ze (#41)

* empty draft

* refactoring

* fix one more issue

* refactoring

* almost done

* documentation

* update compilation

* update setup, add file .def, update CI, cmake, fix build issues

* Update azure-pipelines.yml

* fix build

* adds a way to check a custom operator

* refactoring

* fix ut

* ut

* fix build

* add openmp

* update CI

* ci

* delay import

* delay imports

* update CI

* ci

* cuda

* update setup

* Update setup.py

* Update setup.py

* update

* ci

* update CI, remove duplicated version number

* Update azure-pipelines.yml

* essai

* Update test_ortcy.py

* fix unit tests

* fix one test

---------

Co-authored-by: xavier dupré <xavier.dupre@gmail.com>
  • Loading branch information
xadupre and sdpython authored Jul 7, 2023
1 parent 2a887d8 commit 6fe6bde
Show file tree
Hide file tree
Showing 83 changed files with 2,306 additions and 1,119 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*.sln
*.cmake
*.whl
*.def
/*.png
/*.onnx
.build_path.txt
Expand Down
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.2.0
+++++

* :pr:`41`: implements a custom kernel for RandomForestRegressor easier to optimize
* :pr:`34`: update to onnxruntime v1.15.1
* :pr:`31`: implement a custom CUDA kernel (gemm)
* :pr:`32`: update to onnxruntime v1.15.0
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ onnx-extended: extensions for onnx and onnxruntime
**onnx-extended** extends the list of supported operators in onnx
reference implementation, or implements faster versions in C++.
Documentation `onnx-extended
<http://www.xavierdupre.fr/app/onnx-extended/helpsphinx/index.html>`_.
<https://sdpython.github.io/doc/onnx-extended/>`_.
Source are available on `github/onnx-extended
<https://github.com/sdpython/onnx-extended>`_.

Expand Down Expand Up @@ -115,9 +115,9 @@ can be enabled with the following command:

::

python setup.py build_ext --inplace --enable_nvtx 1
# or
pip install -e . --config-settings="--enable_nvtx=1"
python setup.py build_ext --inplace --use_nvtx 1
# or (not working yet)
pip install -e . --config-settings="--use_nvtx=1"

Experimental cython binding for onnxruntime
+++++++++++++++++++++++++++++++++++++++++++
Expand Down
8 changes: 6 additions & 2 deletions _cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
cmake_minimum_required(VERSION 3.24.0)
project(onnx_extended VERSION 0.2.0)
project(onnx_extended VERSION ${ONNX_EXTENDED_VERSION})

#
# initialisation
#

message(STATUS "-------------------")
message(STATUS "ONNX_EXTENDED_VERSION=${ONNX_EXTENDED_VERSION}")
message(STATUS "CMAKE_VERSION=${CMAKE_VERSION}")
message(STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
message(STATUS "CMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}")
Expand All @@ -25,6 +26,7 @@ message(STATUS "USE_CUDA=${USE_CUDA}")
message(STATUS "CUDA_BUILD=${CUDA_BUILD}")
message(STATUS "USE_NVTX=${USE_NVTX}")
message(STATUS "ORT_VERSION=${ORT_VERSION}")

# message(STATUS "ENV-PATH=$ENV{PATH}")
# message(STATUS "ENV-PYTHONPATH=$ENV{PYTHONPATH}")
message(STATUS "--------------------------------------------")
Expand All @@ -44,8 +46,8 @@ list(APPEND CMAKE_MODULE_PATH
# Packages and constants
#

include("load_externals.cmake")
include("constants.cmake")
include("load_externals.cmake")

#
# modules
Expand All @@ -61,8 +63,10 @@ include("targets/cuda_example_py.cmake")
include("targets/vector_function_cy.cmake")

set(ORTOPS_INCLUDE_DIR "${ROOT_INCLUDE_PATH}/onnx_extended/ortops")
set(REFOPS_INCLUDE_DIR "${ROOT_INCLUDE_PATH}/onnx_extended/reference/c_ops")
include("targets/ortops_tutorial_cpu.cmake")
include("targets/ortops_tutorial_cuda.cmake")
include("targets/ortops_optim_cpu.cmake")

#
# write version
Expand Down
6 changes: 3 additions & 3 deletions _cmake/clang_format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ echo "--cython-lint--"
cython-lint .
echo "--clang-format--"
find onnx_extended -type f \( -name "*.h" -o -name "*.hpp" -o -name "*.cuh" -o -name "*.cpp" -o -name "*.cc" -o -name "*.cu" \) | while read f; do
echo "Processing '$f'";
clang-format --length 88 -i $f;
echo "clang-format -i $f";
clang-format -i $f;
done
echo "--cmake-lint--"
find _cmake -type f \( -name "*.cmake" -o -name "*.txt" \) | while read f; do
echo "Processing '$f'";
echo "cmake-lint $f --line-width=88 --disabled-codes C0103 C0113";
cmake-lint $f --line-width=88 --disabled-codes C0103 C0113;
done
14 changes: 14 additions & 0 deletions _cmake/constants.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
#
# python extension
#
if(MSVC)
set(DLLEXT "dll")
elseif(APPLE)
set(DLLEXT "dylib")
else()
set(DLLEXT "so")
endif()

#
# C++ 14 or C++ 17
#
Expand Down Expand Up @@ -37,6 +48,9 @@ else()
endif()

if(APPLE)
message(STATUS "APPLE: set env var for open mp: CC, CCX, LDFLAGS, CPPFLAGS")
set(ENV{CC} "/usr/local/opt/llvm/bin/clang")
set(ENV{CXX} "/usr/local/opt/llvm/bin/clang++")
set(ENV(LDFLAGS) "-L/usr/local/opt/llvm/lib")
set(ENV(CPPFLAGS) "-I/usr/local/opt/llvm/include")
endif()
Expand Down
74 changes: 58 additions & 16 deletions _cmake/externals/FindCudaExtension.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
# Defines USE_NTVX to enable profiling with NVIDIA profiler.
# CUDA_VERSION must be defined as well.

if(${CMAKE_CUDA_COMPILER} STREQUAL "/usr/bin/nvcc")
message(FATAL_ERROR
"CMAKE_CUDA_COMPILER is equal to '${CMAKE_CUDA_COMPILER}', "
"CUDA_VERSION=${CUDA_VERSION}, "
"CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}, "
"You should specify the cuda version by adding --cuda-version=...")
endif()

if(CUDA_VERSION)
find_package(CUDAToolkit ${CUDA_VERSION} EXACT)
else()
Expand All @@ -14,6 +22,27 @@ message(STATUS "CUDAToolkit_FOUND=${CUDAToolkit_FOUND}")

if(CUDAToolkit_FOUND)

message(STATUS "befor1 language CUDA_VERSION=${CUDA_VERSION}")
message(STATUS "befor1 language CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
message(STATUS "befor1 language CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")

if(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
set(CMAKE_CUDA_ARCHITECTURES "native")
endif()
if(CMAKE_CUDA_COMPILER STREQUAL "CMAKE_CUDA_COMPILER-NOTFOUND")
if(CUDA_VERSION STREQUAL "")
message(FATAL_ERROR "No CMAKE_CUDA_COMPILER for CUDA_VERSION=${CUDA_VERSION}. "
"You can use --cuda-version=<CUDA_VERSION> or set "
"CUDACXX=/usr/local/cuda-<CUDA_VERSION>/bin/nvcc")
else()
set(CMAKE_CUDA_COMPILER "/usr/local/cuda-${CUDA_VERSION}/bin/nvcc")
message(STATUS "set CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")
endif()
endif()

message(STATUS "before language CUDA_VERSION=${CUDA_VERSION}")
message(STATUS "before language CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
message(STATUS "before language CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")
enable_language(CUDA)
message(STATUS "------------- CUDA settings")
message(STATUS "CUDA_VERSION=${CUDA_VERSION}")
Expand All @@ -30,7 +59,7 @@ if(CUDAToolkit_FOUND)
"< ${CUDA_VERSION}, nvcc is not setup properly. "
"Try 'whereis nvcc' and chack the version.")
endif()

set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

Expand All @@ -42,11 +71,13 @@ if(CUDAToolkit_FOUND)

if(CUDA_BUILD STREQUAL "H100opt")

# see https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
# see https://arnon.dk/
# matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
set(CMAKE_CUDA_ARCHITECTURES 90)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90a,code=sm_90a")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90a,code=compute_90a")
set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90a,code=compute_90a")

else() # H100, DEFAULT

Expand All @@ -64,25 +95,36 @@ if(CUDAToolkit_FOUND)
endif()

if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
message(FATAL_ERROR "CUDA verions must be >= 11 but is ${CMAKE_CUDA_COMPILER_VERSION}.")
message(FATAL_ERROR "CUDA verions must be >= 11 but is "
"${CMAKE_CUDA_COMPILER_VERSION}.")
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12)
# 37, 50 still work in CUDA 11 but are marked deprecated and will be removed in future CUDA version.
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37") # K80
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50") # M series
# 37, 50 still work in CUDA 11
# but are marked deprecated and will be removed in future CUDA version.
# K80
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37")
# M series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50")
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52") # M60
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") # P series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_61,code=sm_61") # P series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70") # V series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75") # T series
# M60
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52")
# P series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60")
# P series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_61,code=sm_61")
# V series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70")
# T series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75")
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80") # A series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_86,code=sm_86") # A series
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # A series
# A series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_86,code=sm_86")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87")
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.8)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90") # H series
# H series
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90")
endif()
endif()

Expand Down
6 changes: 0 additions & 6 deletions _cmake/externals/FindMyPython.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,6 @@ else()
message(STATUS "Use find_package(Python3).")
set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
if(APPLE)
message(STATUS "APPLE: set env var for open mp: CC, CCX, LDFLAGS, CPPFLAGS")
set(ENV{CC} "/usr/local/opt/llvm/bin/clang")
set(ENV{CXX} "/usr/local/opt/llvm/bin/clang++")
set(ENV{LDFLAGS} "-L/usr/local/opt/llvm/lib")
set(ENV{CPPFLAGS} "-I/usr/local/opt/llvm/include")

find_package(Python3 ${PYTHON_VERSION} COMPONENTS
Interpreter Development.Module
REQUIRED)
Expand Down
32 changes: 15 additions & 17 deletions _cmake/externals/FindOrt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# downloads onnxruntime as a binary
# functions ort_add_dependency, ort_add_custom_op

file(WRITE "../_setup_ext.txt" "")

if(NOT ORT_VERSION)
set(ORT_VERSION 1.15.1)
set(ORT_VERSION_INT 1150)
Expand Down Expand Up @@ -58,14 +60,6 @@ else()
set(ORT_URL ${ORT_VERSION})
endif()

if(MSVC)
set(DLLEXT "dll")
elseif(APPLE)
set(DLLEXT "dylib")
else()
set(DLLEXT "so")
endif()

find_library(ONNXRUNTIME onnxruntime HINTS "${ONNXRUNTIME_LIB_DIR}")
if(ONNXRUNTIME-NOTFOUND)
message(FATAL_ERROR "onnxruntime cannot be found at '${ONNXRUNTIME_LIB_DIR}'")
Expand Down Expand Up @@ -96,26 +90,27 @@ endif()
#
function(ort_add_dependency name folder_copy)
get_target_property(target_output_directory ${name} BINARY_DIR)
message(STATUS "ort copy ${ORT_LIB_FILES_LENGTH} files from '${ONNXRUNTIME_LIB_DIR}'")
message(STATUS "ort: copy-1 ${ORT_LIB_FILES_LENGTH} files from '${ONNXRUNTIME_LIB_DIR}'")
if(MSVC)
set(destination_dir ${target_output_directory}/${CMAKE_BUILD_TYPE})
else()
set(destination_dir ${target_output_directory})
endif()
message(STATUS "ort copy to '${destination_dir}'")
message(STATUS "ort: copy-2 to '${destination_dir}'")
if(folder_copy)
message(STATUS "ort copy to '${folder_copy}'")
message(STATUS "ort: copy-3 to '${folder_copy}'")
endif()
foreach(file_i ${ORT_LIB_FILES})
if(NOT EXISTS ${destination_dir}/${file_i})
message(STATUS "ort copy '${file_i}' to '${destination_dir}'")
message(STATUS "ort: copy-4 '${file_i}' to '${destination_dir}'")
add_custom_command(
TARGET ${name} POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${destination_dir})
endif()
if(folder_copy)
if(NOT EXISTS ${folder_copy}/${file_i})
message(STATUS "ort copy '${file_i}' to '${folder_copy}'")
message(STATUS "ort: copy-5 '${file_i}' to '${folder_copy}'")
# file(APPEND "../_setup_ext.txt" "copy,${file_i},${folder_copy}\n")
add_custom_command(
TARGET ${name} POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${folder_copy})
Expand All @@ -125,8 +120,6 @@ function(ort_add_dependency name folder_copy)
# file(COPY ${ORT_LIB_FILES} DESTINATION ${target_output_directory})
endfunction()

file(WRITE "../_setup_ext.txt" "")

#
#! ort_add_custom_op : compile a pyx file into cpp
#
Expand All @@ -136,8 +129,13 @@ file(WRITE "../_setup_ext.txt" "")
# \argn: C++ file to compile
#
function(ort_add_custom_op name provider folder)
if (WIN32)
file(WRITE "${folder}/${name}.def" "LIBRARY "
"\"${name}.dll\"\nEXPORTS\n RegisterCustomOps @1")
list(APPEND ARGN "${folder}/${name}.def")
endif()
if (provider STREQUAL "CUDA")
message(STATUS "ort custom op ${provider}: '${name}': ${ARGN}")
message(STATUS "ort: custom op ${provider}: '${name}': ${ARGN}")
add_library(${name} SHARED ${ARGN})

# add property --use_fast_math to cu files
Expand Down Expand Up @@ -173,7 +171,7 @@ function(ort_add_custom_op name provider folder)
PRIVATE
${ONNXRUNTIME_INCLUDE_DIR})
else()
message(STATUS "ort custom op CPU: '${name}': ${ARGN}")
message(STATUS "ort: custom op CPU: '${name}': ${ARGN}")
add_library(${name} SHARED ${ARGN})
target_include_directories(${name} PRIVATE ${ONNXRUNTIME_INCLUDE_DIR})
target_compile_definitions(${name} PRIVATE ORT_VERSION=${ORT_VERSION_INT})
Expand Down
3 changes: 2 additions & 1 deletion _cmake/load_externals.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ message(STATUS "-------------------")
if(CUDA_AVAILABLE)
set(
config_content
"HAS_CUDA = 1\nCUDA_VERSION = '${CUDA_VERSION}'\nCUDA_VERSION_INT = ${CUDA_VERSION_INT}")
"HAS_CUDA = 1\nCUDA_VERSION = '${CUDA_VERSION}'"
"\nCUDA_VERSION_INT = ${CUDA_VERSION_INT}")
else()
set(config_content "HAS_CUDA = 0")
endif()
14 changes: 13 additions & 1 deletion _cmake/targets/c_op_conv_.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,21 @@ local_pybind11_add_module(
../onnx_extended/reference/c_ops/cpu/c_op_conv_.cpp)
eigen_add_dependency(c_op_conv_)

target_include_directories(
c_op_conv_
PRIVATE
${ROOT_INCLUDE_PATH}/onnx_extended)

add_executable(test_c_op_conv_cpp
../_unittests/ut_reference/test_c_op_conv.cpp
../onnx_extended/reference/c_ops/cpu/c_op_common.cpp)
target_include_directories(test_c_op_conv_cpp PRIVATE ${ROOT_INCLUDE_PATH})

target_include_directories(
test_c_op_conv_cpp
PRIVATE
${ROOT_INCLUDE_PATH}
${ROOT_INCLUDE_PATH}/onnx_extended)

eigen_add_dependency(test_c_op_conv_cpp)

add_test(NAME test_c_op_conv_cpp COMMAND test_c_op_conv_cpp)
4 changes: 4 additions & 0 deletions _cmake/targets/c_op_tree_ensemble_py_.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ local_pybind11_add_module(
../onnx_extended/reference/c_ops/cpu/c_op_common.cpp
../onnx_extended/reference/c_ops/cpu/c_op_tree_ensemble_py_.cpp)

target_include_directories(
c_op_tree_ensemble_py_
PRIVATE
${ROOT_INCLUDE_PATH}/onnx_extended)
Loading

0 comments on commit 6fe6bde

Please sign in to comment.