Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
Browse files Browse the repository at this point in the history
…qgemmexc
  • Loading branch information
xadupre committed Nov 2, 2023
2 parents eeb4c77 + 178f7ca commit e7e8725
Show file tree
Hide file tree
Showing 108 changed files with 3,599 additions and 1,197 deletions.
12 changes: 1 addition & 11 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "0c296085f9f65f0f8ef7aec7b9eed55faf37dc40",
"commitHash": "b86cc54efce19530fb953e4b21f57e6b3888534c",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "git submodule at cmake/external/onnx"
Expand Down Expand Up @@ -192,16 +192,6 @@
"comments": "mp11"
}
},
{
"component": {
"type": "git",
"git": {
"commitHash": "6a20ba82b439ea1fd650da4d389e96b60a1dd828",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "onnx"
}
},
{
"component": {
"type": "git",
Expand Down
18 changes: 0 additions & 18 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP16)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_U8)
add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
Expand All @@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_HETERO)
add_definitions(-DOPENVINO_CONFIG_HETERO=1)
add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
onnx;https://github.com/onnx/onnx/archive/6a20ba82b439ea1fd650da4d389e96b60a1dd828.zip;179a22ad4cd67109c60031ae4b6cf2f434d8bd7e
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.15.0.zip;54c3f960a0541c5d8d3e60c2933e11f5d3688a11
#use the commit of supporting all the plugins and TRT 8.6-GA (https://github.com/onnx/onnx-tensorrt/commit/0462dc31ae78f48744b6141ae376df1f96d3f459)
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/a43ce67187bab219520fd80f21af8bbd4354bc8c.zip;572535aefef477050f86744dfab1fef840198035
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/onnx
1 change: 1 addition & 0 deletions cmake/onnxruntime_providers_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_slice.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reshape.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_expand.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reduce.cc"
)
endif()
# add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
Expand Down
6 changes: 6 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ set(contrib_ops_excluded_files
"cuda_contrib_kernels.h"
"inverse.cc"
"fused_conv.cc"
"bert/group_query_attention_helper.h"
"bert/group_query_attention.h"
"bert/group_query_attention.cc"
"bert/group_query_attention_impl.h"
"bert/group_query_attention_impl.cu"
)

if (NOT onnxruntime_ENABLE_ATEN)
Expand All @@ -109,6 +114,7 @@ if (NOT onnxruntime_USE_NCCL)
list(APPEND contrib_ops_excluded_files "collective/distributed_slice.cc")
list(APPEND contrib_ops_excluded_files "collective/distributed_reshape.cc")
list(APPEND contrib_ops_excluded_files "collective/distributed_expand.cc")
list(APPEND contrib_ops_excluded_files "collective/distributed_reduce.cc")
endif()

set(provider_excluded_files
Expand Down
6 changes: 3 additions & 3 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2422,14 +2422,14 @@ This version of the operator has been available since version 1 of the 'com.micr
<dd>When buffered past_key and past_value is used (present_key uses same tensor as past_key), requiredto specify past_sequence_length (could be 0). Otherwise, past_sequence_length inferred from past_key.</dd>
</dl>

#### Outputs (1 - 3)
#### Outputs

<dl>
<dt><tt>output</tt> : T</dt>
<dd>3D output tensor with shape (batch_size, sequence_length, hidden_size)</dd>
<dt><tt>present_key</tt> (optional) : T</dt>
<dt><tt>present_key</tt> : T</dt>
<dd>present state key with support for format BSNH or BNSH. When past_key uses same tensor as present_key(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length.</dd>
<dt><tt>present_value</tt> (optional) : T</dt>
<dt><tt>present_value</tt> : T</dt>
<dd>present state value with support for format BSNH or BNSH. When past_value uses same tensor as present_value(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length.</dd>
</dl>

Expand Down
2 changes: 0 additions & 2 deletions docs/python/ReadMeOV.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs

Installation
------------
Expand All @@ -22,7 +21,6 @@ This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs

``pip3 install onnxruntime-openvino``

Expand Down
8 changes: 6 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{},
enable_vpu_fast_compile{},
enable_npu_fast_compile{},
device_id{},
num_of_threads{},
cache_dir{},
Expand All @@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_vpu_fast_compile; ///< 0 = disabled, nonzero = enabled
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
Expand Down Expand Up @@ -4605,6 +4605,10 @@ struct OrtCustomOp {
OrtStatusPtr(ORT_API_CALL* KernelComputeV2)(_In_ void* op_kernel, _In_ OrtKernelContext* context);

OrtStatusPtr(ORT_API_CALL* InferOutputShapeFn)(_In_ const struct OrtCustomOp* op, _In_ OrtShapeInferContext*);

// Get start range
int(ORT_API_CALL* GetStartVersion)(_In_ const struct OrtCustomOp* op);
int(ORT_API_CALL* GetEndVersion)(_In_ const struct OrtCustomOp* op);
};

/*
Expand Down
13 changes: 13 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,8 @@ struct ShapeInferContext {

using ShapeInferFn = Ort::Status (*)(Ort::ShapeInferContext&);

#define MAX_CUSTOM_OP_END_VER (1UL << 31) - 1

template <typename TOp, typename TKernel, bool WithStatus = false>
struct CustomOpBase : OrtCustomOp {
CustomOpBase() {
Expand Down Expand Up @@ -2280,6 +2282,14 @@ struct CustomOpBase : OrtCustomOp {
}

SetShapeInferFn<TOp>(0);

OrtCustomOp::GetStartVersion = [](const OrtCustomOp* this_) {
return static_cast<const TOp*>(this_)->start_ver_;
};

OrtCustomOp::GetEndVersion = [](const OrtCustomOp* this_) {
return static_cast<const TOp*>(this_)->end_ver_;
};
}

// Default implementation of GetExecutionProviderType that returns nullptr to default to the CPU provider
Expand Down Expand Up @@ -2348,6 +2358,9 @@ struct CustomOpBase : OrtCustomOp {
protected:
// Helper function that returns a map of session config entries specified by CustomOpBase::GetSessionConfigKeys.
void GetSessionConfigs(std::unordered_map<std::string, std::string>& out, ConstSessionOptions options) const;

int start_ver_ = 1;
int end_ver_ = MAX_CUSTOM_OP_END_VER;
};

} // namespace Ort
Expand Down
59 changes: 43 additions & 16 deletions include/onnxruntime/core/session/onnxruntime_lite_custom_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,11 @@ struct OrtLiteCustomOp : public OrtCustomOp {
PARSE_ARGS(Ort::Float8E5M2FNUZ_t, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ)

OrtLiteCustomOp(const char* op_name,
const char* execution_provider) : op_name_(op_name),
execution_provider_(execution_provider) {
const char* execution_provider,
int start_ver = 1, int end_ver = MAX_CUSTOM_OP_END_VER) : op_name_(op_name),
execution_provider_(execution_provider),
start_ver_(start_ver),
end_ver_(end_ver) {
OrtCustomOp::version = ORT_API_VERSION;

OrtCustomOp::GetName = [](const OrtCustomOp* op) { return static_cast<const OrtLiteCustomOp*>(op)->op_name_.c_str(); };
Expand Down Expand Up @@ -837,13 +840,26 @@ struct OrtLiteCustomOp : public OrtCustomOp {
OrtCustomOp::KernelCompute = {};

OrtCustomOp::InferOutputShapeFn = {};

OrtCustomOp::GetStartVersion = [](const OrtCustomOp* op) {
auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
return self->start_ver_;
};

OrtCustomOp::GetEndVersion = [](const OrtCustomOp* op) {
auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
return self->end_ver_;
};
}

const std::string op_name_;
const std::string execution_provider_;

std::vector<ONNXTensorElementDataType> input_types_;
std::vector<ONNXTensorElementDataType> output_types_;

int start_ver_ = 1;
int end_ver_ = MAX_CUSTOM_OP_END_VER;
};

//////////////////////////// OrtLiteCustomFunc ////////////////////////////////
Expand Down Expand Up @@ -873,9 +889,11 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
OrtLiteCustomFunc(const char* op_name,
const char* execution_provider,
ComputeFn compute_fn,
ShapeInferFn shape_infer_fn = {}) : OrtLiteCustomOp(op_name, execution_provider),
compute_fn_(compute_fn),
shape_infer_fn_(shape_infer_fn) {
ShapeInferFn shape_infer_fn = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver),
compute_fn_(compute_fn),
shape_infer_fn_(shape_infer_fn) {
ParseArgs<Args...>(input_types_, output_types_);

OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) {
Expand Down Expand Up @@ -911,9 +929,11 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
OrtLiteCustomFunc(const char* op_name,
const char* execution_provider,
ComputeFnReturnStatus compute_fn_return_status,
ShapeInferFn shape_infer_fn = {}) : OrtLiteCustomOp(op_name, execution_provider),
compute_fn_return_status_(compute_fn_return_status),
shape_infer_fn_(shape_infer_fn) {
ShapeInferFn shape_infer_fn = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver),
compute_fn_return_status_(compute_fn_return_status),
shape_infer_fn_(shape_infer_fn) {
ParseArgs<Args...>(input_types_, output_types_);

OrtCustomOp::KernelComputeV2 = [](void* op_kernel, OrtKernelContext* context) -> OrtStatusPtr {
Expand Down Expand Up @@ -985,8 +1005,9 @@ struct OrtLiteCustomStruct : public OrtLiteCustomOp {
};

OrtLiteCustomStruct(const char* op_name,
const char* execution_provider) : OrtLiteCustomOp(op_name,
execution_provider) {
const char* execution_provider,
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver) {
SetCompute(&CustomOp::Compute);

OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
Expand Down Expand Up @@ -1049,25 +1070,31 @@ template <typename... Args>
OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
const char* execution_provider,
void (*custom_compute_fn)(Args...),
Status (*shape_infer_fn)(ShapeInferContext&) = {}) {
Status (*shape_infer_fn)(ShapeInferContext&) = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) {
using LiteOp = OrtLiteCustomFunc<Args...>;
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn, shape_infer_fn).release();
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn, shape_infer_fn, start_ver, end_ver).release();
}

template <typename... Args>
OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
const char* execution_provider,
Status (*custom_compute_fn_v2)(Args...),
Status (*shape_infer_fn)(ShapeInferContext&) = {}) {
Status (*shape_infer_fn)(ShapeInferContext&) = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) {
using LiteOp = OrtLiteCustomFunc<Args...>;
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn_v2, shape_infer_fn).release();
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn_v2, shape_infer_fn, start_ver, end_ver).release();
}

template <typename CustomOp>
OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
const char* execution_provider) {
const char* execution_provider,
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) {
using LiteOp = OrtLiteCustomStruct<CustomOp>;
return std::make_unique<LiteOp>(op_name, execution_provider).release();
return std::make_unique<LiteOp>(op_name, execution_provider, start_ver, end_ver).release();
}

} // namespace Custom
Expand Down
1 change: 1 addition & 0 deletions js/web/docs/webgpu-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Do not modify directly.*
| Expand | ai.onnx(8-12,13+) | |
| Flatten | ai.onnx(1-8,9-10,11-12,13+) | |
| Floor | ai.onnx(6-12,13+) | |
| FusedConv | com.microsoft(1+) | |
| Gather | ai.onnx(1-10,11-12,13+) | |
| GatherElements | ai.onnx(11-12,13+) | |
| Gelu | com.microsoft(1+) | |
Expand Down
1 change: 1 addition & 0 deletions js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
['Exp', [unaryOps.exp]],
['Expand', [expand]],
['Floor', [unaryOps.floor]],
['FusedConv', [conv, parseConvAttributes]],
['Gather', [gather, parseGatherAttributes]],
['GatherElements', [gatherElements, parseGatherElementsAttributes]],
['Gelu', [unaryOps.gelu]],
Expand Down
4 changes: 2 additions & 2 deletions js/web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ export const activationFnSnippet =
if (!activation) {
return '';
}

// TODO: add implementations
return '';
};

export const biasActivationSnippet = (hasBias: boolean, activation?: Activation): string => `
${hasBias ? 'value = value + getBiasByOutputCoords(coords);' : ''}
${activation ? 'value = activation(value, coords);' : ''}
// TODO uncomment the following line when activation is supported above.
// ${activation ? 'value = activation(value, coords);' : ''}
`;
5 changes: 3 additions & 2 deletions js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,9 @@ export const createConv2DMatMulProgramInfo =
${declareFunctions}
${
conv2dCommonSnippet(
isChannelsLast, fitAOuter, fitBOuter, fitInner, hasBias, undefined, false, elementsSize[0],
elementsSize[1], elementsSize[2], t)}
isChannelsLast, fitAOuter, fitBOuter, fitInner, hasBias,
attributes.activation.toLowerCase() as Activation, false, elementsSize[0], elementsSize[1],
elementsSize[2], t)}
${
isVec4 ?
makeMatMulPackedVec4Source(elementsPerThread, workGroupSize, t, undefined, !isChannelsLast, tileInner) :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,9 @@ export const createConv2DTransposeMatMulProgramInfo =
const dimBOuter : i32 = ${dimBOuter};
const dimInner : i32 = ${dimInner};
${declareFunctions}
${conv2dTransposeCommonSnippet(isChannelsLast, hasBias, undefined, false, innerElementSize)}
${
conv2dTransposeCommonSnippet(
isChannelsLast, hasBias, attributes.activation.toLowerCase() as Activation, false, innerElementSize)}
${
isVec4 ? makeMatMulPackedVec4Source(
elementsPerThread, workGroupSize, 'f32', undefined, !isChannelsLast, tileInner) :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {TensorView} from '../../../tensor-view';
import {ShapeUtil} from '../../../util';
import {ProgramInfo} from '../../types';
import {getBroadcastDims, IndicesHelper, inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from '../common';
import {getActicationSnippet, InternalActivationAttributes} from '../fuse-utils';
import {getActivationSnippet, InternalActivationAttributes} from '../fuse-utils';

import {typeSnippet} from './activation_util';

Expand Down Expand Up @@ -440,7 +440,7 @@ export const createMatmulProgramInfo =
const dimInner = aShape[aShape.length - 1];
const dimBOuter = bShape[bShape.length - 1];
const isVec4 = dimInner % 4 === 0 && dimBOuter % 4 === 0;
const {activationFunction, applyActivation} = getActicationSnippet(activationAttributes);
const {activationFunction, applyActivation} = getActivationSnippet(activationAttributes, isVec4);

// TODO: fine tune size
const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];
Expand Down Expand Up @@ -473,8 +473,8 @@ export const createMatmulProgramInfo =
const dimBOuter: i32 = ${dimBOuter};
const dimInner: i32 = ${dimInner};
${shaderHelper.declareVariables(...inputVariables, output)}
${declareFunctions}
${activationFunction}
${declareFunctions}
${
isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workgroupSize, dataType, batchDims) :
makeMatMulPackedSource(elementsPerThread, workgroupSize, dataType, batchDims)}
Expand Down
Loading

0 comments on commit e7e8725

Please sign in to comment.