Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ort v1.15.0 ov changes customer a #314

Open
wants to merge 9 commits into
base: ort_v1.15.0_ov_changes
Choose a base branch
from
7 changes: 5 additions & 2 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1164,9 +1164,12 @@ if (onnxruntime_USE_OPENVINO)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ typedef struct OrtOpenVINOProviderOptions {
OrtOpenVINOProviderOptions() : device_type{},
enable_vpu_fast_compile{},
device_id{},
num_of_threads{},
num_of_threads{1},
cache_dir{},
context{},
enable_opencl_throttling{},
Expand Down
13 changes: 4 additions & 9 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#include <memory>

#include "core/providers/shared_library/provider_api.h"

#include <inference_engine.hpp>

#include "contexts.h"
#include "backend_manager.h"
#include "ibackend.h"
Expand All @@ -36,11 +33,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
const logging::Logger& logger) {
auto prec_str = GetGlobalContext().precision_str;
if (prec_str == "FP32") {
subgraph_context_.precision = InferenceEngine::Precision::FP32;
subgraph_context_.precision = "FP32";
} else if (prec_str == "FP16") {
subgraph_context_.precision = InferenceEngine::Precision::FP16;
subgraph_context_.precision = "FP16";
} else if (prec_str == "U8") {
subgraph_context_.precision = InferenceEngine::Precision::U8;
subgraph_context_.precision = "U8";
} else {
throw std::string("Invalid OpenVINO Precision type: " + prec_str);
}
Expand Down Expand Up @@ -78,7 +75,6 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos) {
if (GetGlobalContext().enable_dynamic_shapes) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
try {
Expand All @@ -90,7 +86,6 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
}
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
<< "Backend created for graph " << subgraph_context_.subgraph_name;
}
}
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
Expand Down Expand Up @@ -257,7 +252,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
}
#endif
bool use_dynamic_backend = true;
if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
if (subgraph_context_.has_dynamic_input_shape &&

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For NPU we still need dynamic backend so maybe this logic wont be suitable going forward

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic is only skipping whether enable_dynamic_shapes runtime option is provided. According to this logic, if a model has dynamic shaped input it will be handled with dynamic backend.

(GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
concrete_backend_->Infer(context);
Expand Down
64 changes: 23 additions & 41 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#include <fstream>

#include "ov_interface.h"
#include <ngraph/pass/convert_fp32_to_fp16.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include "openvino/pass/convert_fp32_to_fp16.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "core/providers/shared_library/provider_api.h"
#include "backend_utils.h"

Expand Down Expand Up @@ -50,14 +50,14 @@ struct static_cast_int64 {
std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
const SubGraphContext& subgraph_context,
std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
if (IsCILogEnabled()) {
std::cout << "CreateNgraphFunc" << std::endl;
}
const std::string model = model_proto.SerializeAsString();
try {
auto cnn_network = global_context.ie_core.ReadModel(model);
if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
if ((subgraph_context.precision == "FP16") &&
(global_context.device_type.find("VPUX") == std::string::npos)) {
// FP16 transformations
ov::pass::ConvertFP32ToFP16 pass_obj;
Expand Down Expand Up @@ -88,20 +88,19 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
size_t index = results.size() - 1;

for (auto it = results.rbegin(); it != results.rend(); ++it) {
if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we using OV API 1.0 or 2.0

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are with API 2.0 as we are supporting only the latest three release of OV.

const_outputs_map[(*it)->get_friendly_name()] = const_node;
results.erase(results.begin() + index);
}
--index;
}
}
#ifndef NDEBUG
#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0)
#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
if (IsDebugEnabled()) {
std::string name = cnn_network->get_friendly_name();
ov::pass::Serialize serializer(name + ".xml", name + ".bin");
serializer.run_on_model(cnn_network);
ngraph::plot_graph(cnn_network, name + "_executable" + ".dot");
}
#endif
#endif
Expand All @@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
}

InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
if (*type_string == "float" || *type_string == "tensor(float)") {
return InferenceEngine::Precision::FP32;
} else if (*type_string == "float16" || *type_string == "tensor(float16)") {
return InferenceEngine::Precision::FP16;
} else if (*type_string == "int32" || *type_string == "tensor(int32)") {
return InferenceEngine::Precision::I32;
} else if (*type_string == "int16" || *type_string == "tensor(int16)") {
return InferenceEngine::Precision::I16;
} else if (*type_string == "int8" || *type_string == "tensor(int8)") {
return InferenceEngine::Precision::I8;
} else if (*type_string == "uint16" || *type_string == "tensor(uint16)") {
return InferenceEngine::Precision::U16;
} else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
return InferenceEngine::Precision::U8;
} else if (*type_string == "bool" || *type_string == "tensor(bool)") {
return InferenceEngine::Precision::U8;
} else if (*type_string == "int64" || *type_string == "tensor(int64)") {
return InferenceEngine::Precision::I32;
} else {
throw std::string(log_tag + "Unsupported Data type");
}
}

Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
OVInferRequestPtr infer_request,
Expand Down Expand Up @@ -166,7 +140,7 @@ Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context,
std::string output_name,
std::unordered_map<std::string, int> output_names,
std::shared_ptr<ngraph::Node> node) {
std::shared_ptr<ov::Node> node) {
// Find position of '/' in the output_name
int pos = output_name.find("/");
// Copy the substring from start to pos
Expand Down Expand Up @@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
return i;
}

void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor) {
void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor) {
switch (node->get_element_type()) {
case ngraph::element::Type_t::f32: {
case ov::element::Type_t::f32: {
FillOutputHelper<float>(out_tensor, node);
break;
}
case ngraph::element::Type_t::boolean: {
case ov::element::Type_t::boolean: {
FillOutputHelper<char>(out_tensor, node);
break;
}
case ngraph::element::Type_t::i32: {
case ov::element::Type_t::i32: {
FillOutputHelper<int32_t>(out_tensor, node);
break;
}
case ngraph::element::Type_t::i64: {
case ov::element::Type_t::i64: {
FillOutputHelper<int64_t>(out_tensor, node);
break;
}
case ngraph::element::Type_t::f16: {
case ov::element::Type_t::f16: {
FillOutputHelper<float>(out_tensor, node);
break;
}
Expand All @@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::Unowne
}
}

#if defined(_MSC_VER)
#pragma warning(disable : 4127)
#endif

template <typename T>
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node) {
auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node) {
auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
auto res = const_node->cast_vector<T>();
T* tensor_data = out_tensor.GetTensorMutableData<T>();
std::copy(res.begin(), res.end(), tensor_data);
}

#if defined(_MSC_VER)
#pragma warning(default : 4127)
#endif

void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
std::string input_name, Ort::KernelContext& context,
const SubGraphContext& subgraph_context) {
Expand Down
11 changes: 4 additions & 7 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,16 @@ bool IsCILogEnabled();

int GetFirstAvailableDevice(GlobalContext& global_context);

void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor);
void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);

template <typename T>
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node);
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node);

Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context,
std::string output_name,
std::unordered_map<std::string, int> output_names,
std::shared_ptr<ngraph::Node> node);

InferenceEngine::Precision
ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);
std::shared_ptr<ov::Node> node);

Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
Expand All @@ -61,7 +58,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,

std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
std::ostream& stream, std::string deviceName);
Expand Down
29 changes: 17 additions & 12 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include "core/providers/shared_library/provider_api.h"
#include "../backend_utils.h"
#include <ngraph/pass/constant_folding.hpp>
#include "basic_backend.h"
#include "../backend_manager.h"

Expand Down Expand Up @@ -45,6 +44,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
}
#endif
try {
std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
if (global_context.is_wholly_supported_graph) {
#if defined(IO_BUFFER_ENABLED)
if ((global_context.device_type.find("GPU") != std::string::npos) &&
Expand All @@ -57,7 +57,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
} else {
#if defined(OPENVINO_2023_0)
if (subgraph_context.precision != InferenceEngine::Precision::FP16) {
if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
const std::string model = model_proto.SerializeAsString();
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
Expand All @@ -72,8 +72,8 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
#endif
#else
#if defined(OPENVINO_2023_0)
if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) {
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
const std::string model = model_proto.SerializeAsString();
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
Expand Down Expand Up @@ -111,7 +111,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
}

bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>> & const_outputs_map) {
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>> & const_outputs_map) {
if (const_outputs_map.size() == subgraph_context_.output_names.size())
subgraph_context_.is_constant = true;
if (subgraph_context_.is_constant) {
Expand All @@ -122,17 +122,20 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
}

void BasicBackend::PopulateConfigValue(ov::AnyMap & device_config) {
// Set inference precision if device_type != AUTO
// if (global_context_.device_type.find("GPU_FP16")!= std::string::npos){
// device_config.emplace(ov::hint::inference_precision(global_context_.precision_str));
// }
device_config = {};
// Set inference precision based on device precision for OV backend
if (global_context_.precision_str.find("FP16") != std::string::npos && global_context_.device_type == "GPU") {
device_config.emplace(ov::hint::inference_precision("f16"));
}
if (global_context_.precision_str.find("FP32") != std::string::npos) {
device_config.emplace(ov::hint::inference_precision("f32"));
}
#ifndef NDEBUG
if (openvino_ep::backend_utils::IsDebugEnabled()) {
device_config.emplace(ov::enable_profiling(true));
}
#endif
#if defined(OPENVINO_2023_0)
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
if (global_context_.device_type.find("VPUX") != std::string::npos) {
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("VPUX_COMPILER_TYPE", "MLIR");
Expand Down Expand Up @@ -160,7 +163,10 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
void BasicBackend::EnableGPUThrottling(ov::AnyMap & device_config) {
if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("PLUGIN_THROTTLE", "1");
device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
// device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
}
}

Expand Down Expand Up @@ -190,7 +196,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
}
size_t batch_slice_idx = 0;
if (subgraph_context_.has_dynamic_input_shape &&
global_context_.enable_dynamic_shapes == true &&
(global_context_.device_type.find("CPU") != std::string::npos ||
global_context_.device_type.find("GPU") != std::string::npos)) {
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/providers/openvino/backends/basic_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class BasicBackend : public IBackend {
private:
bool ImportBlob(std::string hw_target, bool vpu_status);
void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
void PopulateConfigValue(ov::AnyMap& device_config);
void EnableCaching();
void EnableGPUThrottling(ov::AnyMap& device_config);
Expand All @@ -48,7 +48,7 @@ class BasicBackend : public IBackend {
mutable std::mutex compute_lock_;
std::shared_ptr<OVNetwork> ie_cnn_network_;
OVExeNetwork exe_network_;
std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
#if defined IO_BUFFER_ENABLED
OVRemoteContextPtr remote_context_;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct SubGraphContext {
std::vector<int> input_indexes;
std::unordered_map<std::string, int> input_names;
std::unordered_map<std::string, int> output_names;
OVPrecision precision;
std::string precision;
};

} // namespace openvino_ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0");
result = obj.Execute();
#elif defined(OPENVINO_2023_1)
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1");
result = obj.Execute();
#endif

return result;
Expand Down
Loading
Loading