From 986d0830c005811882c00cbea31445989d76385f Mon Sep 17 00:00:00 2001 From: Feiyue Chen Date: Mon, 3 Jun 2024 03:09:52 +0000 Subject: [PATCH] Add gather/squeeze/unsqueeze/tile etc ops --- onnxruntime/core/framework/node_unit.cc | 8 +- .../vsinpu/builders/impl/base_op_builder.cc | 18 ++- .../vsinpu/builders/impl/cast_op_builder.h | 43 ++++++ .../vsinpu/builders/impl/clip_op_builder.h | 4 +- .../builders/impl/dequantize_op_builder.h | 1 - .../vsinpu/builders/impl/gather_op_builder.h | 82 ++++++++++ .../vsinpu/builders/impl/norm_op_builder.h | 2 +- .../vsinpu/builders/impl/reduce_op_builder.h | 24 +-- .../vsinpu/builders/impl/resize_op_builder.h | 146 ++++++++++++++++++ .../vsinpu/builders/impl/squeeze_op_builder.h | 85 ++++++++++ .../vsinpu/builders/impl/tile_op_builder.h | 68 ++++++++ .../builders/impl/unsqueeze_op_builder.h | 63 ++++++++ .../vsinpu/builders/op_builder_factory.h | 13 +- .../vsinpu/patches/AccuracyCorrection.patch | 26 ++++ .../vsinpu/patches/int8_checker_hack.patch | 22 --- ...g.patch => local_testing_record_res.patch} | 0 .../core/providers/vsinpu/vsinpu_ep_graph.h | 2 +- .../core/providers/vsinpu/vsinpu_util.cc | 7 +- 18 files changed, 565 insertions(+), 49 deletions(-) create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/cast_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/gather_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/resize_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/squeeze_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/tile_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/unsqueeze_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch delete mode 100644 onnxruntime/core/providers/vsinpu/patches/int8_checker_hack.patch rename onnxruntime/core/providers/vsinpu/patches/{hack_for_testing.patch => local_testing_record_res.patch} (100%) diff --git a/onnxruntime/core/framework/node_unit.cc b/onnxruntime/core/framework/node_unit.cc index 174942b9033d0..1fccefd141333 100644 --- a/onnxruntime/core/framework/node_unit.cc +++ b/onnxruntime/core/framework/node_unit.cc @@ -284,7 +284,7 @@ void NodeUnit::InitForSingleNode() { const auto& input_defs = target_node_.InputDefs(); const auto& output_defs = target_node_.OutputDefs(); auto qlinear_type = GetQLinearOpType(target_node_); - if (qlinear_type == QLinearOpType::Unknown || IsVariadicQLinearOp(qlinear_type)) { // TODO, add variadic support + if (qlinear_type == QLinearOpType::Unknown) { // Not a Qlinear op, add all inputs / outputs auto add_all_io = [](std::vector& defs, const ConstPointerContainer>& node_defs) { @@ -334,6 +334,12 @@ void NodeUnit::InitForSingleNode() { outputs_.push_back(NodeUnitIODef{*output_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], input_defs.size() == 3 ? input_defs[2] : nullptr}}); + } else if (IsVariadicQLinearOp(qlinear_type)) { + int input_num = (input_defs.size() - 2) / 3; + for (int i = 0; i < input_num; i++) { + inputs_.push_back(NodeUnitIODef{*input_defs[3 * i + 2], NodeUnitIODef::QuantParam{*input_defs[3 * i + 3], input_defs[3 * i + 4]}}); + } + outputs_.push_back(NodeUnitIODef{*output_defs[0], NodeUnitIODef::QuantParam{*input_defs[0], input_defs[1]}}); } else { ORT_THROW("The QLinear op [", static_cast(qlinear_type), "] is not supported"); } diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc index b3a3fa40a51c7..ba00dbf4f6baf 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc @@ -49,13 +49,13 @@ bool BaseOpBuilder::HasSupportedInputOutputs(const InitializedTensorSet& initial return false; } - // We do not support dynamic shape input yet + // We do not support dynamic shape input yet, but resize op's second input can be empty cause we not care about this value for (const auto& dim : shape_proto->dim()) { if (!dim.has_dim_value()) { LOGS_DEFAULT(WARNING) << "Dynamic shape is not supported for now, for input:" << node_arg.Name(); return false; } - if (dim.dim_value() == 0) { + if (dim.dim_value() == 0 && op_type != "Resize") { LOGS_DEFAULT(WARNING) << "Zero in shape is not supported for now, for input:" << node_arg.Name(); return false; } @@ -91,6 +91,10 @@ bool BaseOpBuilder::HasSupportedInputOutputs(const InitializedTensorSet& initial return false; if (!has_initialized_quant_param(*input.quant_param->zero_point, initializers)) return false; + if (input.quant_param->zero_point->Type() != input.node_arg.Type()) { + LOGS_DEFAULT(ERROR) << "Invalid input type because the data type mismatch with its' quant param type."; + return false; + } } } } @@ -115,7 +119,7 @@ bool BaseOpBuilder::HasSupportedInputOutputs(const InitializedTensorSet& initial bool BaseOpBuilder::HasSupportedInputOutputsImpl( const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit) const { - // Check input data type, int64 is generally unsupported + // Check input/output data type, int64 is generally unsupported // specific op builder can override this if the int64 input corresponds to VSINPU param for (const auto& input : node_unit.Inputs()) { auto input_type = input.node_arg.Type(); @@ -125,6 +129,14 @@ bool BaseOpBuilder::HasSupportedInputOutputsImpl( return false; } } + for (const auto& output : node_unit.Outputs()) { + auto output_type = output.node_arg.Type(); + if (*output_type == "tensor(int64)" || !util::IsTypeSupported(&output.node_arg)) { + LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported output type : " + << *output_type; + return false; + } + } return true; } diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/cast_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/cast_op_builder.h new file mode 100644 index 0000000000000..68c9702c8d9ba --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/cast_op_builder.h @@ -0,0 +1,43 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/shared/utils/utils.h" +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +namespace onnxruntime { +namespace vsi { +namespace npu { +class CastOpBuilder : public BaseOpBuilder { + protected: + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, std::vector>& inputs, + std::vector>& outputs, const NodeUnit& node_unit) override { + LOGS_DEFAULT(VERBOSE) << "Creating Cast Op."; + NodeAttrHelper helper(node_unit.GetNode()); + auto op = graph_ep->GetGraph()->CreateOperation(); + (*op).BindInput(inputs[0]).BindOutputs(outputs); + return true; + } +}; + +} // namespace npu +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h index a91bd6ecf5d3e..9ad1ae6c82009 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h @@ -28,9 +28,9 @@ namespace onnxruntime { namespace vsi { namespace npu { class ClipOpBuilder final : public BaseOpBuilder { - bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, const Node* node) const override { - if (node->SinceVersion() > 6) { + if (node->SinceVersion() > 6) { if (node->InputDefs().size() > 1 && !Contains(graph_viewer.GetAllInitializedTensors(), node->InputDefs()[1]->Name())) { LOGS_DEFAULT(WARNING) << "Min/Max value must be const input or attribute."; return false; diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/dequantize_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/dequantize_op_builder.h index 9774dc81ece88..760aed1fd3ecc 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/dequantize_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/dequantize_op_builder.h @@ -36,7 +36,6 @@ class DequantizeLinearOpBuilder : public BaseOpBuilder { }; bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit) const override { - auto input_type = node_unit.Inputs()[0].node_arg.Type(); if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) { LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : " diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/gather_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/gather_op_builder.h new file mode 100644 index 0000000000000..97818927322e4 --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/gather_op_builder.h @@ -0,0 +1,82 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +class GatherOpBuilder : public BaseOpBuilder { + bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers, + const NodeUnit& node_unit) const override { + auto input = node_unit.Inputs()[0]; + auto indices = node_unit.Inputs()[1]; + if (util::IsTypeSupported(&input.node_arg) && util::IsTypeSupported(&indices.node_arg)) { + if (*input.node_arg.Type() == "tensor(int64)") { + LOGS_DEFAULT(WARNING) << "Only support indices tensor to be int64 type in gather op."; + return false; + } + if (*indices.node_arg.Type() != "tensor(int64)" && *indices.node_arg.Type() != "tensor(int32)") { + LOGS_DEFAULT(WARNING) << "Unsupported indices tensor type in gather op."; + return false; + } + if (*indices.node_arg.Type() == "tensor(int64)" && !Contains(initializers, indices.node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Only support const attribute if indice tensor is in int64 type."; + return false; + } + return true; + } + return false; + } + + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const NodeUnit& node_unit) override { + LOGS_DEFAULT(VERBOSE) << "Creating Gather Op."; + NodeAttrHelper helper(node_unit.GetNode()); + auto axis = helper.Get("axis", 0); + axis = util::ReverseAxis(axis, inputs[0]->GetShape().size()); + auto op = graph_ep->GetGraph()->CreateOperation(axis, 0); + + bool is_i64_indices = inputs[1]->GetDataType() == tim::vx::DataType::INT64; + if (!is_i64_indices) { + (*op).BindInputs(inputs).BindOutputs(outputs); + } else { + std::vector origin_data(inputs[1]->GetSpec().GetElementNum()); + inputs[1]->CopyDataFromTensor(origin_data.data()); + std::vector transformed_data(origin_data.begin(), origin_data.end()); + auto transformed_indices = graph_ep->GetGraph()->CreateTensor( + inputs[1]->GetSpec().SetAttribute(tim::vx::TensorAttribute::INPUT).SetDataType(tim::vx::DataType::INT32), transformed_data.data()); + (*op).BindInput(inputs[0]).BindInput(transformed_indices).BindOutput(outputs[0]); + } + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; + +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h index 52c85e9ff698c..f9d7dbe7183fc 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h @@ -36,7 +36,7 @@ class BatchNormOpBuilder : public BaseOpBuilder { mean_tensor = 3, var_tensor = 4 }; - int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override{ return 9; } + int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 9; } bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, const Node* node) const override { diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h index 607040cfdaf3a..0ddf106da75b6 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h @@ -38,9 +38,9 @@ class ReduceMeanOpBuilder : public BaseOpBuilder { return true; } bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, - std::vector>& inputs, - std::vector>& outputs, - const NodeUnit& node_unit) override { + std::vector>& inputs, + std::vector>& outputs, + const NodeUnit& node_unit) override { LOGS_DEFAULT(INFO) << "Creating ReduceMean Op."; NodeAttrHelper helper(node_unit.GetNode()); @@ -48,22 +48,22 @@ class ReduceMeanOpBuilder : public BaseOpBuilder { auto input_shape_size = inputs[0]->GetShape().size(); if (node_unit.SinceVersion() < 18 && helper.HasAttr("axes")) { - def_axes = helper.Get("axes", def_axes); + def_axes = helper.Get("axes", def_axes); } else if (inputs.size() > 1) { - def_axes.resize(inputs[1]->GetSpec().GetElementNum()); - inputs[1]->CopyDataFromTensor(def_axes.data()); + def_axes.resize(inputs[1]->GetSpec().GetElementNum()); + inputs[1]->CopyDataFromTensor(def_axes.data()); } else { - for (int64_t i = 0; i < input_shape_size; ++i) { - def_axes.push_back(i); - } + for (int64_t i = 0; i < input_shape_size; ++i) { + def_axes.push_back(i); + } } std::vector axes(def_axes.begin(), def_axes.end()); axes = util::ReverseAxis(axes, input_shape_size); if (helper.HasAttr("noop_with_empty_axes") && inputs.size() == 1 && helper.Get("noop_with_empty_axes", 0) == 1) { - outputs[0] = inputs[0]; - return true; + outputs[0] = inputs[0]; + return true; } bool keepdims = helper.Get("keepdims", 1) == 1; @@ -71,7 +71,7 @@ class ReduceMeanOpBuilder : public BaseOpBuilder { (*op).BindInput(inputs[0]).BindOutputs(outputs); graph_ep->GetOps().push_back(std::move(op)); return true; -} + } }; } // namespace npu diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/resize_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/resize_op_builder.h new file mode 100644 index 0000000000000..cc49461940aed --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/resize_op_builder.h @@ -0,0 +1,146 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +class ResizeOpBuilder : public BaseOpBuilder { + bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers, + const NodeUnit& node_unit) const override { + auto input_type = node_unit.Inputs()[0].node_arg.Type(); + if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) { + LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : " + << *input_type; + return false; + } + if (node_unit.SinceVersion() > 10) { + if (node_unit.Inputs().size() > 2 && !Contains(initializers, node_unit.Inputs()[2].node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Scale tensor must be constant."; + return false; + } + if (node_unit.Inputs().size() > 3 && !Contains(initializers, node_unit.Inputs()[3].node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Size tensor must be constant."; + return false; + } + } else { + if (!Contains(initializers, node_unit.Inputs()[1].node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Scale tensor must be constant."; + return false; + } + } + return true; + } + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, const Node* node) const override { + auto shape = vsi::npu::util::GetTensorShape(*node->InputDefs()[0]); + if (shape.NumDimensions() > 4) { + LOGS_DEFAULT(WARNING) << "3D or more dimesions resize is not supported."; + return false; + } + + NodeAttrHelper helper(*node); + if (helper.Get("antialiax", 0) != 0) { + LOGS_DEFAULT(WARNING) << "Antialias attribute is not supported."; + return false; + } + auto& cooridinate = helper.Get("coordinate_transoformation_mode", "half_pixel"); + if (cooridinate != "align_corners" && cooridinate != "half_pixel") { + LOGS_DEFAULT(WARNING) << "Only support half_pixel and align_corners attributes now."; + return false; + } + if (helper.Get("keep_aspect_ratio_policy", "stretch") != "stretch") { + LOGS_DEFAULT(WARNING) << "Not support to keep aspect ratio."; + return false; + } + if (helper.Get("mode", "nearest") == "cubic") { + LOGS_DEFAULT(WARNING) << "Not support the cubic resize type yet."; + return false; + } + if (helper.HasAttr("axes")) { + LOGS_DEFAULT(WARNING) << "Axes-specifying is not support."; + return false; + } + return true; + } + + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const NodeUnit& node_unit) override { + LOGS_DEFAULT(VERBOSE) << "Creating Resize Op."; + auto inputs_num = inputs.size(); + bool is_1dresize = inputs[0]->GetShape().size() == 1; + NodeAttrHelper helper(node_unit.GetNode()); + auto onnx_mode = helper.Get("mode", "nearest"); + auto coordinate_transformation = helper.Get("coordinate_transformation_mode", "half_pixel"); + bool is_size_set = helper.HasAttr("size"); + int32_t scale_index = node_unit.SinceVersion() > 10 ? 2 : 1; + + auto resize_type = onnx_mode == "nearest" ? tim::vx::ResizeType::NEAREST_NEIGHBOR : tim::vx::ResizeType::BILINEAR; + bool align_corners = coordinate_transformation == "align_corners"; + bool half_pixel_center = coordinate_transformation == "half_pixel"; + std::shared_ptr op = nullptr; + if (is_1dresize) { + int target_size; + if (is_size_set) { + int64_t onnx_size; + inputs[3]->CopyDataFromTensor(&onnx_size); + target_size = static_cast(onnx_size); + op = graph_ep->GetGraph()->CreateOperation(resize_type, 0.0f, align_corners, half_pixel_center, target_size); + } else { + float scale; + inputs[scale_index]->CopyDataFromTensor(&scale); + op = graph_ep->GetGraph()->CreateOperation(resize_type, scale, align_corners, half_pixel_center, 0); + } + } else { + int target_height, target_width; + if (is_size_set) { + std::vector onnx_sizes(inputs[3]->GetShape().size()); + inputs[3]->CopyDataFromTensor(onnx_sizes.data()); + target_height = static_cast(onnx_sizes[1]); + target_width = static_cast(onnx_sizes[0]); + op = graph_ep->GetGraph()->CreateOperation(resize_type, 0.0f, align_corners, half_pixel_center, target_height, target_width); + } else { + auto input_shape = inputs[0]->GetShape(); + std::vector scales(input_shape.size()); + std::vector out_shape(input_shape.size()); + inputs[scale_index]->CopyDataFromTensor(scales.data()); + for (int i = 0; i < input_shape.size(); i++) { + out_shape[i] = input_shape[i] * scales[input_shape.size() - 1 - i]; + } + op = graph_ep->GetGraph()->CreateOperation(resize_type, 0, align_corners, half_pixel_center, out_shape[1], out_shape[0]); + } + } + + (*op).BindInput(inputs[0]).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; + +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/squeeze_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/squeeze_op_builder.h new file mode 100644 index 0000000000000..54174060c6381 --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/squeeze_op_builder.h @@ -0,0 +1,85 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +class SqueezeOpBuilder : public BaseOpBuilder { + bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers, + const NodeUnit& node_unit) const override { + auto input_type = node_unit.Inputs()[0].node_arg.Type(); + if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) { + LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : " + << *input_type; + return false; + } + if (node_unit.SinceVersion() > 11) { + if (node_unit.Inputs().size() > 1 && !Contains(initializers, node_unit.Inputs()[1].node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Only support const axes in Squeeze op."; + return false; + } + } + return true; + } + + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const NodeUnit& node_unit) override { + LOGS_DEFAULT(INFO) << "Creating Squeeze Op."; + + NodeAttrHelper helper(node_unit.GetNode()); + std::vector def_axes; + auto input_shape_size = inputs[0]->GetShape().size(); + + if (node_unit.SinceVersion() < 13 && helper.HasAttr("axes")) { + def_axes = helper.Get("axes", def_axes); + } else if (inputs.size() > 1) { + def_axes.resize(inputs[1]->GetSpec().GetElementNum()); + inputs[1]->CopyDataFromTensor(def_axes.data()); + } else { // if axes is empty from onnx, check input shape to determine + for (int64_t i = 0; i < input_shape_size; ++i) { + if (inputs[0]->GetShape()[i] == 1) { + def_axes.push_back(i); + } + } + } + + std::vector axes(def_axes.begin(), def_axes.end()); + axes = util::ReverseAxis(axes, input_shape_size); + + std::vector timvx_axes(axes.begin(), axes.end()); + + auto op = graph_ep->GetGraph()->CreateOperation(timvx_axes); + (*op).BindInput(inputs[0]).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/tile_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/tile_op_builder.h new file mode 100644 index 0000000000000..cb81ab82f7c88 --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/tile_op_builder.h @@ -0,0 +1,68 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +class TileOpBuilder : public BaseOpBuilder { + int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 6; } + + bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers, + const NodeUnit& node_unit) const override { + auto input = node_unit.Inputs()[0]; + auto multipliers = node_unit.Inputs()[1]; + if (initializers.end() == initializers.find(multipliers.node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Multipliers of tile op must be known."; + return false; + } + if (util::IsTypeSupported(&input.node_arg) && util::IsTypeSupported(&multipliers.node_arg)) { + if (*input.node_arg.Type() != "tensor(int64)") { + return true; + } + } + LOGS_DEFAULT(WARNING) << "Input type not supported."; + return false; + } + + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const NodeUnit& node_unit) override { + LOGS_DEFAULT(VERBOSE) << "Creating Tile Op."; + std::vector multipliers(inputs[1]->GetShape()[0]); + inputs[1]->CopyDataFromTensor(multipliers.data()); + std::reverse(multipliers.begin(), multipliers.end()); + std::vector timvx_multipliers(multipliers.begin(), multipliers.end()); + auto op = graph_ep->GetGraph()->CreateOperation(timvx_multipliers); + (*op).BindInput(inputs[0]).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; + +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/unsqueeze_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/unsqueeze_op_builder.h new file mode 100644 index 0000000000000..40fe93c6b8454 --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/unsqueeze_op_builder.h @@ -0,0 +1,63 @@ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +class UnsqueezeOpBuilder : public BaseOpBuilder { + bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers, + const NodeUnit& node_unit) const override { + auto input_type = node_unit.Inputs()[0].node_arg.Type(); + if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) { + LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : " + << *input_type; + return false; + } + if (node_unit.SinceVersion() > 11 && !Contains(initializers, node_unit.Inputs()[1].node_arg.Name())) { + LOGS_DEFAULT(WARNING) << "Only support const axes in Unsqueeze op."; + return false; + } + return true; + } + + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const NodeUnit& node_unit) override { + LOGS_DEFAULT(INFO) << "Creating Unsqueeze Op."; + + NodeAttrHelper helper(node_unit.GetNode()); + std::vector def_axes; + auto input_shape_size = inputs[0]->GetShape().size(); + + if (node_unit.SinceVersion() < 13 && helper.HasAttr("axes")) { + def_axes = helper.Get("axes", def_axes); + } else if (inputs.size() > 1) { + def_axes.resize(inputs[1]->GetSpec().GetElementNum()); + inputs[1]->CopyDataFromTensor(def_axes.data()); + } else { // if axes is empty from onnx, check input shape to determine + for (int64_t i = 0; i < input_shape_size; ++i) { + if (inputs[0]->GetShape()[i] == 1) { + def_axes.push_back(i); + } + } + } + + std::vector axes(def_axes.begin(), def_axes.end()); + axes = util::ReverseAxis(axes, input_shape_size + axes.size()); + + std::vector timvx_axes(inputs[0]->GetShape().begin(), inputs[0]->GetShape().end()); + for (int32_t dim : axes) { + timvx_axes.insert(timvx_axes.begin() + dim, 1); + } + + auto op = graph_ep->GetGraph()->CreateOperation(timvx_axes); + (*op).BindInput(inputs[0]).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h b/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h index 3ab865d18f3a3..6538ee3078505 100644 --- a/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h @@ -42,7 +42,12 @@ #include "impl/qlinearmatmul_op_builder.h" #include "impl/qlinear_binary_op_builder.h" #include "impl/qlinearconcat_op_builder.h" - +#include "impl/gather_op_builder.h" +#include "impl/tile_op_builder.h" +#include "impl/squeeze_op_builder.h" +#include "impl/unsqueeze_op_builder.h" +#include "impl/resize_op_builder.h" +#include "impl/cast_op_builder.h" namespace onnxruntime { namespace vsi { namespace npu { @@ -94,6 +99,12 @@ static const std::map reg = { REGISTER_OP_BUILDER("QLinearAdd", QLinearAddOpBuilder), REGISTER_OP_BUILDER("QLinearMul", QLinearMulOpBuilder), REGISTER_OP_BUILDER("QLinearConcat", QLinearConcatOpBuilder), + REGISTER_OP_BUILDER("Gather", GatherOpBuilder), + REGISTER_OP_BUILDER("Tile", TileOpBuilder), + REGISTER_OP_BUILDER("Squeeze", SqueezeOpBuilder), + REGISTER_OP_BUILDER("Unsqueeze", UnsqueezeOpBuilder), + REGISTER_OP_BUILDER("Resize", ResizeOpBuilder), + REGISTER_OP_BUILDER("Cast", CastOpBuilder), #undef REGISTER_OP_BUILDER }; diff --git a/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch b/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch new file mode 100644 index 0000000000000..d44190101d9fa --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch @@ -0,0 +1,26 @@ +diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc +index 47c18c478d..93b44501cd 100644 +--- a/onnxruntime/test/providers/checkers.cc ++++ b/onnxruntime/test/providers/checkers.cc +@@ -195,7 +195,7 @@ struct TensorCheck { + // For any other EPs, we still expect an exact match for the results + // TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513 + if ((provider_type == kNnapiExecutionProvider || provider_type == kDmlExecutionProvider || +- provider_type == kXnnpackExecutionProvider) && ++ provider_type == kXnnpackExecutionProvider || provider_type == kVSINPUExecutionProvider) && + (has_abs_err || has_rel_err)) { + double threshold = has_abs_err ? *(params.absolute_error) + : 0.0; +diff --git a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc +index 2bc0df5e36..7beb78c2ff 100644 +--- a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc ++++ b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc +@@ -498,7 +498,7 @@ class QLinearConvOpTester { + // NOTE, for now the tolerance will only apply if the NNAPI is actually used, + // if for any reason the execution falls back to CPU, we still expect an exact match + // See, 'void Check(...' in onnxruntime/test/providers/provider_test_utils.cc +-#if defined(USE_NNAPI) || defined(USE_DML) ++#if defined(USE_NNAPI) || defined(USE_DML) || defined(USE_VSINPU) + // TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513 + abs_error = 1.0f; + #endif diff --git a/onnxruntime/core/providers/vsinpu/patches/int8_checker_hack.patch b/onnxruntime/core/providers/vsinpu/patches/int8_checker_hack.patch deleted file mode 100644 index a94bf4363e168..0000000000000 --- a/onnxruntime/core/providers/vsinpu/patches/int8_checker_hack.patch +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc -index f1a7240ea3..436031dfa8 100644 ---- a/onnxruntime/test/providers/checkers.cc -+++ b/onnxruntime/test/providers/checkers.cc -@@ -154,6 +154,7 @@ struct TensorCheck { - } - - const bool has_abs_err = params.absolute_error.has_value(); -+ const int8_t default_abs_err = 1; - if (has_abs_err) { - double threshold = *(params.absolute_error); - -@@ -162,7 +163,8 @@ struct TensorCheck { - } - } else { - for (int i = 0; i < size; ++i) { -- EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; -+ // EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; -+ EXPECT_NEAR(cur_expected[i], cur_actual[i], default_abs_err) << "i:" << i; - } - } - } diff --git a/onnxruntime/core/providers/vsinpu/patches/hack_for_testing.patch b/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch similarity index 100% rename from onnxruntime/core/providers/vsinpu/patches/hack_for_testing.patch rename to onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h index 6dbf0c64b9b30..d39ff91e4dfc4 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h +++ b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h @@ -65,7 +65,7 @@ class GraphEP { bool& GetCompiled() { return compiled_; } std::shared_ptr& GetGraph() { return graph_; } - std::vector>& GetOps() { return ops_;} + std::vector>& GetOps() { return ops_; } std::map>& GetTensors() { return tensors_; } diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_util.cc b/onnxruntime/core/providers/vsinpu/vsinpu_util.cc index 111db391cd8f7..c4b893ffc413e 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_util.cc +++ b/onnxruntime/core/providers/vsinpu/vsinpu_util.cc @@ -267,7 +267,6 @@ bool IsTypeSupported(const NodeArg* node_arg) { case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16: case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8: case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8: - case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16: case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32: case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64: return true; @@ -445,13 +444,11 @@ void GetQuantizationScaleAndZeroPoint( auto zps = unpacked_tensor.DataAsSpan(); std::vector zps_vec(zps.begin(), zps.end()); pcq_zps = onnxruntime::make_optional(std::move(zps_vec)); - } - else if (is_int32_zp) { + } else if (is_int32_zp) { auto zps = unpacked_tensor.DataAsByteSpan(); std::vector zps_vec(zps.begin(), zps.end()); pcq_zps = onnxruntime::make_optional(std::move(zps_vec)); - } - else { + } else { auto zps = unpacked_tensor.DataAsSpan(); std::vector zps_vec(zps.begin(), zps.end()); pcq_zps = onnxruntime::make_optional(std::move(zps_vec));