From 5db941317c2a4efb17741e418c90e76424860830 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Mon, 30 Sep 2024 15:18:16 +0200
Subject: [PATCH 1/3] Remove usage of deprecated get_ie_output_name() function

Signed-off-by: Andrii Staikov <andrii.staikov@intel.com>
---
 modules/nvidia_plugin/src/ops/result.cpp | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/modules/nvidia_plugin/src/ops/result.cpp b/modules/nvidia_plugin/src/ops/result.cpp
index 030f6c9d2..b27ba766c 100644
--- a/modules/nvidia_plugin/src/ops/result.cpp
+++ b/modules/nvidia_plugin/src/ops/result.cpp
@@ -62,21 +62,8 @@ std::optional<std::size_t> ResultOp::GetOutputTensorSubIndex(const ov::Output<ov
 }
 
 std::vector<std::string> ResultOp::GetOutputTensorName(const ov::op::v0::Result& node) {
-    std::vector<std::string> outputNames;
-
     const auto& input = node.input_value(0);
-    auto name = ov::op::util::get_ie_output_name(input);
-    outputNames.push_back(name);
-
-    auto resultName = node.get_friendly_name();
-
-    // NOTE: New way of getting the fused names for OpenVINO 2.0 API
-    // TODO: When support for old OpenVINO API will be stopped, consider using only this approach.
-    //       Also see any issues with Tacatron2 network
-    const auto& fusedResults = ov::getFusedNamesVector(input.get_node()->shared_from_this());
-    outputNames.insert(outputNames.end(), fusedResults.begin(), fusedResults.end());
-
-    return outputNames;
+    return ov::getFusedNamesVector(input.get_node()->shared_from_this());
 }
 
 void ResultOp::Capture(InferenceRequestContext& context,

From 569a0cbe8dc123b2ad9070208169915310b7eeb2 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 1 Oct 2024 11:43:00 +0200
Subject: [PATCH 2/3] fix OPENVINO_DEBUG

---
 modules/llama_cpp_plugin/src/compiled_model.cpp | 6 +++---
 modules/llama_cpp_plugin/src/infer_request.cpp  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp
index b53b11363..e56416034 100644
--- a/modules/llama_cpp_plugin/src/compiled_model.cpp
+++ b/modules/llama_cpp_plugin/src/compiled_model.cpp
@@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname,
     : ICompiledModel(nullptr, plugin),
       m_gguf_fname(gguf_fname),
       m_num_threads(num_threads) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl;
+    OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... ")
     llama_model_params mparams = llama_model_default_params();
     mparams.n_gpu_layers = 99;
     m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams);
-    OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl;
+    OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...")
 
     auto input_ids = std::make_shared<ov::opset13::Parameter>(ov::element::Type_t::i64, ov::PartialShape({-1, -1}));
     auto fake_convert = std::make_shared<ov::opset13::Convert>(input_ids->output(0), ov::element::Type_t::f32);
@@ -71,7 +71,7 @@ std::shared_ptr<const ov::Model> LlamaCppModel::get_runtime_model() const {
 }
 
 void LlamaCppModel::set_property(const ov::AnyMap& properties) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: attempted to set_property (did nothing)";
+    OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)");
 }
 
 ov::Any LlamaCppModel::get_property(const std::string& name) const {
diff --git a/modules/llama_cpp_plugin/src/infer_request.cpp b/modules/llama_cpp_plugin/src/infer_request.cpp
index 3eefd56d9..de7c47471 100644
--- a/modules/llama_cpp_plugin/src/infer_request.cpp
+++ b/modules/llama_cpp_plugin/src/infer_request.cpp
@@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr<ov::ITensor>& tensor,
 LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model,
                                                    size_t num_threads)
     : ov::ISyncInferRequest(compiled_model) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: infer request ctor called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: infer request ctor called");
     llama_context_params cparams = llama_context_default_params();
     cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency();
     cparams.n_ctx = 0;  // this means that the actual n_ctx will be taken equal to the model's train-time value
@@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const L
 }
 void LlamaCppSyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port,
                                                 const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: set_tensors_impl called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: set_tensors_impl called");
 }
 
 void llama_batch_add_reimpl(struct llama_batch& batch,
@@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() {
     llama_batch_free(batch);
 };
 std::vector<ov::ProfilingInfo> LlamaCppSyncInferRequest::get_profiling_info() const {
-    OPENVINO_DEBUG << "llama_cpp_plugin: get_profiling_info() called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: get_profiling_info() called");
     return std::vector<ov::ProfilingInfo>{};
 };
 
 std::vector<ov::SoPtr<ov::IVariableState>> LlamaCppSyncInferRequest::query_state() const {
-    OPENVINO_DEBUG << "llama_cpp_plugin: query_state() called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: query_state() called");
     return {std::static_pointer_cast<ov::IVariableState>(std::make_shared<LlamaCppState>(m_llama_ctx))};
 }
 

From 85daf65d050382aebcf81250228f6e65b2d2802b Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 1 Oct 2024 11:53:41 +0200
Subject: [PATCH 3/3] fix ;

---
 modules/llama_cpp_plugin/src/compiled_model.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp
index e56416034..c3db1d6cf 100644
--- a/modules/llama_cpp_plugin/src/compiled_model.cpp
+++ b/modules/llama_cpp_plugin/src/compiled_model.cpp
@@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname,
     : ICompiledModel(nullptr, plugin),
       m_gguf_fname(gguf_fname),
       m_num_threads(num_threads) {
-    OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... ")
+    OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... ");
     llama_model_params mparams = llama_model_default_params();
     mparams.n_gpu_layers = 99;
     m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams);
-    OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...")
+    OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...");
 
     auto input_ids = std::make_shared<ov::opset13::Parameter>(ov::element::Type_t::i64, ov::PartialShape({-1, -1}));
     auto fake_convert = std::make_shared<ov::opset13::Convert>(input_ids->output(0), ov::element::Type_t::f32);