-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ort v1.15.0 ov changes customer a #314
base: ort_v1.15.0_ov_changes
Are you sure you want to change the base?
Changes from 4 commits
9b6a51a
ba69aed
7f2f30e
2ef1750
9112813
6326e58
b916082
6dadd2b
3c47cf2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,8 @@ | |
#include <fstream> | ||
|
||
#include "ov_interface.h" | ||
#include <ngraph/pass/convert_fp32_to_fp16.hpp> | ||
#include <ngraph/pass/constant_folding.hpp> | ||
#include "openvino/pass/convert_fp32_to_fp16.hpp" | ||
#include "openvino/pass/constant_folding.hpp" | ||
#include "core/providers/shared_library/provider_api.h" | ||
#include "backend_utils.h" | ||
|
||
|
@@ -50,14 +50,14 @@ struct static_cast_int64 { | |
std::shared_ptr<OVNetwork> | ||
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, | ||
const SubGraphContext& subgraph_context, | ||
std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) { | ||
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) { | ||
if (IsCILogEnabled()) { | ||
std::cout << "CreateNgraphFunc" << std::endl; | ||
} | ||
const std::string model = model_proto.SerializeAsString(); | ||
try { | ||
auto cnn_network = global_context.ie_core.ReadModel(model); | ||
if ((subgraph_context.precision == InferenceEngine::Precision::FP16) && | ||
if ((subgraph_context.precision == "FP16") && | ||
(global_context.device_type.find("VPUX") == std::string::npos)) { | ||
// FP16 transformations | ||
ov::pass::ConvertFP32ToFP16 pass_obj; | ||
|
@@ -88,7 +88,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext | |
size_t index = results.size() - 1; | ||
|
||
for (auto it = results.rbegin(); it != results.rend(); ++it) { | ||
if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) { | ||
if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we using OV API 1.0 or 2.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are with API 2.0 as we are supporting only the latest three release of OV. |
||
const_outputs_map[(*it)->get_friendly_name()] = const_node; | ||
results.erase(results.begin() + index); | ||
} | ||
|
@@ -101,7 +101,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext | |
std::string name = cnn_network->get_friendly_name(); | ||
ov::pass::Serialize serializer(name + ".xml", name + ".bin"); | ||
serializer.run_on_model(cnn_network); | ||
ngraph::plot_graph(cnn_network, name + "_executable" + ".dot"); | ||
} | ||
#endif | ||
#endif | ||
|
@@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext | |
} | ||
} | ||
|
||
InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) { | ||
ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type); | ||
if (*type_string == "float" || *type_string == "tensor(float)") { | ||
return InferenceEngine::Precision::FP32; | ||
} else if (*type_string == "float16" || *type_string == "tensor(float16)") { | ||
return InferenceEngine::Precision::FP16; | ||
} else if (*type_string == "int32" || *type_string == "tensor(int32)") { | ||
return InferenceEngine::Precision::I32; | ||
} else if (*type_string == "int16" || *type_string == "tensor(int16)") { | ||
return InferenceEngine::Precision::I16; | ||
} else if (*type_string == "int8" || *type_string == "tensor(int8)") { | ||
return InferenceEngine::Precision::I8; | ||
} else if (*type_string == "uint16" || *type_string == "tensor(uint16)") { | ||
return InferenceEngine::Precision::U16; | ||
} else if (*type_string == "uint8" || *type_string == "tensor(uint8)") { | ||
return InferenceEngine::Precision::U8; | ||
} else if (*type_string == "bool" || *type_string == "tensor(bool)") { | ||
return InferenceEngine::Precision::U8; | ||
} else if (*type_string == "int64" || *type_string == "tensor(int64)") { | ||
return InferenceEngine::Precision::I32; | ||
} else { | ||
throw std::string(log_tag + "Unsupported Data type"); | ||
} | ||
} | ||
|
||
Ort::UnownedValue | ||
GetOutputTensor(Ort::KernelContext& context, size_t batch_size, | ||
OVInferRequestPtr infer_request, | ||
|
@@ -166,7 +140,7 @@ Ort::UnownedValue | |
GetOutputTensor(Ort::KernelContext& context, | ||
std::string output_name, | ||
std::unordered_map<std::string, int> output_names, | ||
std::shared_ptr<ngraph::Node> node) { | ||
std::shared_ptr<ov::Node> node) { | ||
// Find position of '/' in the output_name | ||
int pos = output_name.find("/"); | ||
// Copy the substring from start to pos | ||
|
@@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) { | |
return i; | ||
} | ||
|
||
void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor) { | ||
void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor) { | ||
switch (node->get_element_type()) { | ||
case ngraph::element::Type_t::f32: { | ||
case ov::element::Type_t::f32: { | ||
FillOutputHelper<float>(out_tensor, node); | ||
break; | ||
} | ||
case ngraph::element::Type_t::boolean: { | ||
case ov::element::Type_t::boolean: { | ||
FillOutputHelper<char>(out_tensor, node); | ||
break; | ||
} | ||
case ngraph::element::Type_t::i32: { | ||
case ov::element::Type_t::i32: { | ||
FillOutputHelper<int32_t>(out_tensor, node); | ||
break; | ||
} | ||
case ngraph::element::Type_t::i64: { | ||
case ov::element::Type_t::i64: { | ||
FillOutputHelper<int64_t>(out_tensor, node); | ||
break; | ||
} | ||
case ngraph::element::Type_t::f16: { | ||
case ov::element::Type_t::f16: { | ||
FillOutputHelper<float>(out_tensor, node); | ||
break; | ||
} | ||
|
@@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::Unowne | |
} | ||
} | ||
|
||
#if defined(_MSC_VER) | ||
#pragma warning(disable : 4127) | ||
#endif | ||
|
||
template <typename T> | ||
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node) { | ||
auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>(node); | ||
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node) { | ||
auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node); | ||
auto res = const_node->cast_vector<T>(); | ||
T* tensor_data = out_tensor.GetTensorMutableData<T>(); | ||
std::copy(res.begin(), res.end(), tensor_data); | ||
} | ||
|
||
#if defined(_MSC_VER) | ||
#pragma warning(default : 4127) | ||
#endif | ||
|
||
void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx, | ||
std::string input_name, Ort::KernelContext& context, | ||
const SubGraphContext& subgraph_context) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
|
||
#include "core/providers/shared_library/provider_api.h" | ||
#include "../backend_utils.h" | ||
#include <ngraph/pass/constant_folding.hpp> | ||
#include "basic_backend.h" | ||
#include "../backend_manager.h" | ||
|
||
|
@@ -45,6 +44,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
} | ||
#endif | ||
try { | ||
std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str; | ||
if (global_context.is_wholly_supported_graph) { | ||
#if defined(IO_BUFFER_ENABLED) | ||
if ((global_context.device_type.find("GPU") != std::string::npos) && | ||
|
@@ -57,7 +57,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; | ||
} else { | ||
#if defined(OPENVINO_2023_0) | ||
if (subgraph_context.precision != InferenceEngine::Precision::FP16) { | ||
if (!subgraph_context_.has_dynamic_input_shape && dev_prec!="CPU_FP16") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Has a bug been raised with OV team for CPU FP 16 Precision There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not yet. Shall raise one. |
||
const std::string model = model_proto.SerializeAsString(); | ||
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name); | ||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; | ||
|
@@ -73,7 +73,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
#endif | ||
#else | ||
#if defined(OPENVINO_2023_0) | ||
if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) { | ||
if (!subgraph_context_.has_dynamic_input_shape && dev_prec!="CPU_FP16") { | ||
const std::string model = model_proto.SerializeAsString(); | ||
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name); | ||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; | ||
|
@@ -111,7 +111,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq)); | ||
} | ||
|
||
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>> & const_outputs_map) { | ||
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>> & const_outputs_map) { | ||
if (const_outputs_map.size() == subgraph_context_.output_names.size()) | ||
subgraph_context_.is_constant = true; | ||
if (subgraph_context_.is_constant) { | ||
|
@@ -122,11 +122,14 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
} | ||
|
||
void BasicBackend::PopulateConfigValue(ov::AnyMap & device_config) { | ||
// Set inference precision if device_type != AUTO | ||
// if (global_context_.device_type.find("GPU_FP16")!= std::string::npos){ | ||
// device_config.emplace(ov::hint::inference_precision(global_context_.precision_str)); | ||
// } | ||
device_config = {}; | ||
// Set inference precision based on device precision for OV backend | ||
if (global_context_.precision_str.find("FP16")!= std::string::npos && global_context_.device_type == "GPU"){ | ||
device_config.emplace(ov::hint::inference_precision("f16")); | ||
} | ||
if (global_context_.precision_str.find("FP32")!= std::string::npos){ | ||
device_config.emplace(ov::hint::inference_precision("f32")); | ||
} | ||
#ifndef NDEBUG | ||
if (openvino_ep::backend_utils::IsDebugEnabled()) { | ||
device_config.emplace(ov::enable_profiling(true)); | ||
|
@@ -157,12 +160,15 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
} | ||
} | ||
|
||
void BasicBackend::EnableGPUThrottling(ov::AnyMap & device_config) { | ||
if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) { | ||
LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device"; | ||
device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1"; | ||
} | ||
void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) { | ||
if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) { | ||
LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device"; | ||
std::pair<std::string, ov::Any> device_property; | ||
device_property = std::make_pair("PLUGIN_THROTTLE", "1"); | ||
device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property)); | ||
// device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commented code |
||
} | ||
} | ||
|
||
// Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on | ||
// an Infer Request indexed by infer_req_idx | ||
|
@@ -190,7 +196,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, | |
} | ||
size_t batch_slice_idx = 0; | ||
if (subgraph_context_.has_dynamic_input_shape && | ||
global_context_.enable_dynamic_shapes == true && | ||
(global_context_.device_type.find("CPU") != std::string::npos || | ||
global_context_.device_type.find("GPU") != std::string::npos)) { | ||
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name)); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For NPU we still need dynamic backend so maybe this logic wont be suitable going forward
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic is only skipping whether enable_dynamic_shapes runtime option is provided. According to this logic, if a model has dynamic shaped input it will be handled with dynamic backend.