From 5db941317c2a4efb17741e418c90e76424860830 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Mon, 30 Sep 2024 15:18:16 +0200 Subject: [PATCH 1/3] Remove usage of deprecated get_ie_output_name() function Signed-off-by: Andrii Staikov --- modules/nvidia_plugin/src/ops/result.cpp | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/modules/nvidia_plugin/src/ops/result.cpp b/modules/nvidia_plugin/src/ops/result.cpp index 030f6c9d2..b27ba766c 100644 --- a/modules/nvidia_plugin/src/ops/result.cpp +++ b/modules/nvidia_plugin/src/ops/result.cpp @@ -62,21 +62,8 @@ std::optional ResultOp::GetOutputTensorSubIndex(const ov::Output ResultOp::GetOutputTensorName(const ov::op::v0::Result& node) { - std::vector outputNames; - const auto& input = node.input_value(0); - auto name = ov::op::util::get_ie_output_name(input); - outputNames.push_back(name); - - auto resultName = node.get_friendly_name(); - - // NOTE: New way of getting the fused names for OpenVINO 2.0 API - // TODO: When support for old OpenVINO API will be stopped, consider using only this approach. - // Also see any issues with Tacatron2 network - const auto& fusedResults = ov::getFusedNamesVector(input.get_node()->shared_from_this()); - outputNames.insert(outputNames.end(), fusedResults.begin(), fusedResults.end()); - - return outputNames; + return ov::getFusedNamesVector(input.get_node()->shared_from_this()); } void ResultOp::Capture(InferenceRequestContext& context, From 569a0cbe8dc123b2ad9070208169915310b7eeb2 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 1 Oct 2024 11:43:00 +0200 Subject: [PATCH 2/3] fix OPENVINO_DEBUG --- modules/llama_cpp_plugin/src/compiled_model.cpp | 6 +++--- modules/llama_cpp_plugin/src/infer_request.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp index b53b11363..e56416034 100644 --- a/modules/llama_cpp_plugin/src/compiled_model.cpp +++ b/modules/llama_cpp_plugin/src/compiled_model.cpp @@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, : ICompiledModel(nullptr, plugin), m_gguf_fname(gguf_fname), m_num_threads(num_threads) { - OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl; + OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... ") llama_model_params mparams = llama_model_default_params(); mparams.n_gpu_layers = 99; m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams); - OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl; + OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...") auto input_ids = std::make_shared(ov::element::Type_t::i64, ov::PartialShape({-1, -1})); auto fake_convert = std::make_shared(input_ids->output(0), ov::element::Type_t::f32); @@ -71,7 +71,7 @@ std::shared_ptr LlamaCppModel::get_runtime_model() const { } void LlamaCppModel::set_property(const ov::AnyMap& properties) { - OPENVINO_DEBUG << "llama_cpp_plugin: attempted to set_property (did nothing)"; + OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)"); } ov::Any LlamaCppModel::get_property(const std::string& name) const { diff --git a/modules/llama_cpp_plugin/src/infer_request.cpp b/modules/llama_cpp_plugin/src/infer_request.cpp index 3eefd56d9..de7c47471 100644 --- a/modules/llama_cpp_plugin/src/infer_request.cpp +++ b/modules/llama_cpp_plugin/src/infer_request.cpp @@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr& tensor, LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr& compiled_model, size_t num_threads) : ov::ISyncInferRequest(compiled_model) { - OPENVINO_DEBUG << "llama_cpp_plugin: infer request ctor called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: infer request ctor called"); llama_context_params cparams = llama_context_default_params(); cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency(); cparams.n_ctx = 0; // this means that the actual n_ctx will be taken equal to the model's train-time value @@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr port, const std::vector>& tensors) { - OPENVINO_DEBUG << "llama_cpp_plugin: set_tensors_impl called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: set_tensors_impl called"); } void llama_batch_add_reimpl(struct llama_batch& batch, @@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() { llama_batch_free(batch); }; std::vector LlamaCppSyncInferRequest::get_profiling_info() const { - OPENVINO_DEBUG << "llama_cpp_plugin: get_profiling_info() called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: get_profiling_info() called"); return std::vector{}; }; std::vector> LlamaCppSyncInferRequest::query_state() const { - OPENVINO_DEBUG << "llama_cpp_plugin: query_state() called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: query_state() called"); return {std::static_pointer_cast(std::make_shared(m_llama_ctx))}; } From 85daf65d050382aebcf81250228f6e65b2d2802b Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 1 Oct 2024 11:53:41 +0200 Subject: [PATCH 3/3] fix ; --- modules/llama_cpp_plugin/src/compiled_model.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp index e56416034..c3db1d6cf 100644 --- a/modules/llama_cpp_plugin/src/compiled_model.cpp +++ b/modules/llama_cpp_plugin/src/compiled_model.cpp @@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, : ICompiledModel(nullptr, plugin), m_gguf_fname(gguf_fname), m_num_threads(num_threads) { - OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... ") + OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... "); llama_model_params mparams = llama_model_default_params(); mparams.n_gpu_layers = 99; m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams); - OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...") + OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF..."); auto input_ids = std::make_shared(ov::element::Type_t::i64, ov::PartialShape({-1, -1})); auto fake_convert = std::make_shared(input_ids->output(0), ov::element::Type_t::f32);