intel · preetha-intel · May 19, 2023 · Jun 6, 2023 · Jun 14, 2023 · Aug 8, 2023
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -7,9 +7,6 @@
 #include <memory>
 
 #include "core/providers/shared_library/provider_api.h"
-
-#include <inference_engine.hpp>
-
 #include "contexts.h"
 #include "backend_manager.h"
 #include "ibackend.h"
@@ -36,11 +33,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                                const logging::Logger& logger) {
   auto prec_str = GetGlobalContext().precision_str;
   if (prec_str == "FP32") {
-    subgraph_context_.precision = InferenceEngine::Precision::FP32;
+    subgraph_context_.precision = "FP32";
   } else if (prec_str == "FP16") {
-    subgraph_context_.precision = InferenceEngine::Precision::FP16;
+    subgraph_context_.precision = "FP16";
   } else if (prec_str == "U8") {
-    subgraph_context_.precision = InferenceEngine::Precision::U8;
+    subgraph_context_.precision = "U8";
   } else {
     throw std::string("Invalid OpenVINO Precision type: " + prec_str);
   }
@@ -78,7 +75,6 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
     if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
         GetGlobalContext().device_type.find("GPU") != std::string::npos) {
-      if (GetGlobalContext().enable_dynamic_shapes) {
         LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
                            << "Creating backend Dynamic Shapes";
         try {
@@ -90,7 +86,6 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
         }
         LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
                            << "Backend created for graph " << subgraph_context_.subgraph_name;
-      }
     }
   } else {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
@@ -257,7 +252,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
   }
 #endif
   bool use_dynamic_backend = true;
-  if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
+  if (subgraph_context_.has_dynamic_input_shape &&
       (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
        GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
     concrete_backend_->Infer(context);

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -8,8 +8,8 @@
 #include <fstream>
 
 #include "ov_interface.h"
-#include <ngraph/pass/convert_fp32_to_fp16.hpp>
-#include <ngraph/pass/constant_folding.hpp>
+#include "openvino/pass/convert_fp32_to_fp16.hpp"
+#include "openvino/pass/constant_folding.hpp"
 #include "core/providers/shared_library/provider_api.h"
 #include "backend_utils.h"
 
@@ -50,14 +50,14 @@ struct static_cast_int64 {
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
               const SubGraphContext& subgraph_context,
-              std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
+              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   const std::string model = model_proto.SerializeAsString();
   try {
     auto cnn_network = global_context.ie_core.ReadModel(model);
-    if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
+    if ((subgraph_context.precision == "FP16") &&
         (global_context.device_type.find("VPUX") == std::string::npos)) {
       // FP16 transformations
       ov::pass::ConvertFP32ToFP16 pass_obj;
@@ -88,7 +88,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       size_t index = results.size() - 1;
 
       for (auto it = results.rbegin(); it != results.rend(); ++it) {
-        if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
+        if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
           const_outputs_map[(*it)->get_friendly_name()] = const_node;
           results.erase(results.begin() + index);
         }
@@ -101,7 +101,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       std::string name = cnn_network->get_friendly_name();
       ov::pass::Serialize serializer(name + ".xml", name + ".bin");
       serializer.run_on_model(cnn_network);
-      ngraph::plot_graph(cnn_network, name + "_executable" + ".dot");
     }
 #endif
 #endif
@@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
   }
 }
 
-InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
-  ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
-  if (*type_string == "float" || *type_string == "tensor(float)") {
-    return InferenceEngine::Precision::FP32;
-  } else if (*type_string == "float16" || *type_string == "tensor(float16)") {
-    return InferenceEngine::Precision::FP16;
-  } else if (*type_string == "int32" || *type_string == "tensor(int32)") {
-    return InferenceEngine::Precision::I32;
-  } else if (*type_string == "int16" || *type_string == "tensor(int16)") {
-    return InferenceEngine::Precision::I16;
-  } else if (*type_string == "int8" || *type_string == "tensor(int8)") {
-    return InferenceEngine::Precision::I8;
-  } else if (*type_string == "uint16" || *type_string == "tensor(uint16)") {
-    return InferenceEngine::Precision::U16;
-  } else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
-    return InferenceEngine::Precision::U8;
-  } else if (*type_string == "bool" || *type_string == "tensor(bool)") {
-    return InferenceEngine::Precision::U8;
-  } else if (*type_string == "int64" || *type_string == "tensor(int64)") {
-    return InferenceEngine::Precision::I32;
-  } else {
-    throw std::string(log_tag + "Unsupported Data type");
-  }
-}
-
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
                 OVInferRequestPtr infer_request,
@@ -166,7 +140,7 @@ Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                 std::string output_name,
                 std::unordered_map<std::string, int> output_names,
-                std::shared_ptr<ngraph::Node> node) {
+                std::shared_ptr<ov::Node> node) {
   // Find position of '/' in the output_name
   int pos = output_name.find("/");
   // Copy the substring from start to pos
@@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
   return i;
 }
 
-void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor) {
+void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor) {
   switch (node->get_element_type()) {
-    case ngraph::element::Type_t::f32: {
+    case ov::element::Type_t::f32: {
       FillOutputHelper<float>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::boolean: {
+    case ov::element::Type_t::boolean: {
       FillOutputHelper<char>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::i32: {
+    case ov::element::Type_t::i32: {
       FillOutputHelper<int32_t>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::i64: {
+    case ov::element::Type_t::i64: {
       FillOutputHelper<int64_t>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::f16: {
+    case ov::element::Type_t::f16: {
       FillOutputHelper<float>(out_tensor, node);
       break;
     }
@@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::Unowne
   }
 }
 
+#if defined(_MSC_VER)
+#pragma warning(disable : 4127)
+#endif
+
 template <typename T>
-void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node) {
-  auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
+void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node) {
+  auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
   auto res = const_node->cast_vector<T>();
   T* tensor_data = out_tensor.GetTensorMutableData<T>();
   std::copy(res.begin(), res.end(), tensor_data);
 }
 
+#if defined(_MSC_VER)
+#pragma warning(default : 4127)
+#endif
+
 void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
                    std::string input_name, Ort::KernelContext& context,
                    const SubGraphContext& subgraph_context) {

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -32,19 +32,16 @@ bool IsCILogEnabled();
 
 int GetFirstAvailableDevice(GlobalContext& global_context);
 
-void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor);
+void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);
 
 template <typename T>
-void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node);
+void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node);
 
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                 std::string output_name,
                 std::unordered_map<std::string, int> output_names,
-                std::shared_ptr<ngraph::Node> node);
-
-InferenceEngine::Precision
-ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);
+                std::shared_ptr<ov::Node> node);
 
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
@@ -61,7 +58,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
 
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
-              std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
+              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                             std::ostream& stream, std::string deviceName);

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -9,7 +9,6 @@
 
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
-#include <ngraph/pass/constant_folding.hpp>
 #include "basic_backend.h"
 #include "../backend_manager.h"
 
@@ -45,6 +44,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   }
 #endif
   try {
+    std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
     if (global_context.is_wholly_supported_graph) {
 #if defined(IO_BUFFER_ENABLED)
       if ((global_context.device_type.find("GPU") != std::string::npos) &&
@@ -57,7 +57,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       } else {
 #if defined(OPENVINO_2023_0)
-        if (subgraph_context.precision != InferenceEngine::Precision::FP16) {
+        if (!subgraph_context_.has_dynamic_input_shape && dev_prec!="CPU_FP16") {
           const std::string model = model_proto.SerializeAsString();
           exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
           LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
@@ -73,7 +73,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
 #endif
 #else
 #if defined(OPENVINO_2023_0)
-      if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) {
+      if (!subgraph_context_.has_dynamic_input_shape && dev_prec!="CPU_FP16") {
         const std::string model = model_proto.SerializeAsString();
         exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
@@ -111,7 +111,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
     inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
   }
 
-  bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>> & const_outputs_map) {
+  bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>> & const_outputs_map) {
     if (const_outputs_map.size() == subgraph_context_.output_names.size())
       subgraph_context_.is_constant = true;
     if (subgraph_context_.is_constant) {
@@ -122,11 +122,14 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   }
 
   void BasicBackend::PopulateConfigValue(ov::AnyMap & device_config) {
-    // Set inference precision if device_type != AUTO
-    // if (global_context_.device_type.find("GPU_FP16")!= std::string::npos){
-    //   device_config.emplace(ov::hint::inference_precision(global_context_.precision_str));
-    // }
     device_config = {};
+    // Set inference precision based on device precision for OV backend
+    if (global_context_.precision_str.find("FP16")!= std::string::npos && global_context_.device_type == "GPU"){
+      device_config.emplace(ov::hint::inference_precision("f16"));
+    }
+    if (global_context_.precision_str.find("FP32")!= std::string::npos){
+      device_config.emplace(ov::hint::inference_precision("f32"));
+    }
 #ifndef NDEBUG
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       device_config.emplace(ov::enable_profiling(true));
@@ -157,12 +160,15 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
     }
   }
 
-  void BasicBackend::EnableGPUThrottling(ov::AnyMap & device_config) {
-    if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
-      LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
-      device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
-    }
+void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
+  if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
+    LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
+    std::pair<std::string, ov::Any> device_property;
+    device_property = std::make_pair("PLUGIN_THROTTLE", "1");
+    device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
+    // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
   }
+}
 
   // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
   // an Infer Request indexed by infer_req_idx
@@ -190,7 +196,6 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
         }
         size_t batch_slice_idx = 0;
         if (subgraph_context_.has_dynamic_input_shape &&
-            global_context_.enable_dynamic_shapes == true &&
             (global_context_.device_type.find("CPU") != std::string::npos ||
              global_context_.device_type.find("GPU") != std::string::npos)) {
           auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -31,7 +31,7 @@ class BasicBackend : public IBackend {
  private:
   bool ImportBlob(std::string hw_target, bool vpu_status);
   void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
-  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
+  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
   void PopulateConfigValue(ov::AnyMap& device_config);
   void EnableCaching();
   void EnableGPUThrottling(ov::AnyMap& device_config);
@@ -48,7 +48,7 @@ class BasicBackend : public IBackend {
   mutable std::mutex compute_lock_;
   std::shared_ptr<OVNetwork> ie_cnn_network_;
   OVExeNetwork exe_network_;
-  std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
+  std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
   std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
 #if defined IO_BUFFER_ENABLED
   OVRemoteContextPtr remote_context_;

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -40,7 +40,7 @@ struct SubGraphContext {
   std::vector<int> input_indexes;
   std::unordered_map<std::string, int> input_names;
   std::unordered_map<std::string, int> output_names;
-  OVPrecision precision;
+  std::string precision;
 };
 
 }  // namespace openvino_ep

diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -75,8 +75,14 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteCont
 #endif
 
 std::vector<std::string> OVCore::GetAvailableDevices() {
-  auto obj = oe.get_available_devices();
-  return obj;
+  auto available_devices = oe.get_available_devices();
+  for (int i = 0; i < int(available_devices.size()); i++) {
+    if (available_devices[i].find("GPU") != std::string::npos) {
+        std::string luid_str = oe.get_property(available_devices[i], ov::device::luid.name()).as<std::string>();
+        available_devices[i] = available_devices[i]+"_"+ luid_str;
+      }
+    }
+  return available_devices;
 }
 
 OVInferRequest OVExeNetwork::CreateInferRequest() {

diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -5,11 +5,13 @@
 
 #include <vector>
 
-#include <inference_engine.hpp>
-#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0)
+
+#if defined (OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3)  || (OPENVINO_2023_0)
 #define OV_API_20
 #include "openvino/openvino.hpp"
 #include "openvino/pass/convert_fp32_to_fp16.hpp"
+#else
+#include <inference_engine.hpp>
 #endif
 
 #ifdef IO_BUFFER_ENABLED
@@ -26,10 +28,8 @@ class OVCore;
 class OVInferRequest;
 class OVExeNetwork;
 
-typedef InferenceEngine::Precision OVPrecision;
 typedef ov::Tensor OVTensor;
 typedef ov::ProfilingInfo OVProfilingInfo;
-typedef ov::AnyMap OVConfig;
 typedef ov::Model OVNetwork;
 typedef std::shared_ptr<OVInferRequest> OVInferRequestPtr;
 typedef std::shared_ptr<OVTensor> OVTensorPtr;