From 2ae943e059a1a6c173680b6c67c419fd95a95d6f Mon Sep 17 00:00:00 2001
From: Alexey Smirnov <alexey.smirnov@intel.com>
Date: Wed, 2 Oct 2024 18:55:04 +0000
Subject: [PATCH] Address review comments part 2

---
 .../src/plugin/npuw/compiled_model.cpp        |   6 +-
 .../src/plugin/npuw/compiled_model.hpp        |   2 +-
 .../intel_npu/src/plugin/npuw/lazy_tensor.cpp | 275 ++++++++++++------
 .../intel_npu/src/plugin/npuw/lazy_tensor.hpp |   9 +-
 .../plugin/npuw/partitioning/partitioning.cpp |  49 ++--
 .../plugin/npuw/partitioning/partitioning.hpp |   2 +-
 .../npuw/partitioning/patterns/dcoff.cpp      |  12 +-
 7 files changed, 225 insertions(+), 130 deletions(-)
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 73345113e89b2d..21ad4a32bd6fb5 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -288,7 +288,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
             m_compiled_submodels[id].host_gather = subgraph._host_gather;
             m_compiled_submodels[id].param_base = fcn_template._param_offset;
             m_compiled_submodels[id].closure = subgraph._closure;
-            m_compiled_submodels[id].transformations = subgraph._transformations;
+            m_compiled_submodels[id].lazy_closure = subgraph._lazy_closure;
             m_compiled_submodels[id].scales = subgraph._scales;
             m_compiled_submodels[id].zerops = subgraph._zerops;
             m_compiled_submodels[id].is_remote.resize(subgraph._closure.size(), false);
@@ -426,8 +426,8 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
         m_compiled_submodels[idx].closure.resize(0);
         m_compiled_submodels[idx].is_remote.resize(0);
 
-        for (std::size_t tidx = 0; tidx < comp_model_desc.transformations.size(); ++tidx) {
-            const auto& lt = m_compiled_submodels[idx].transformations[tidx];
+        for (std::size_t tidx = 0; tidx < comp_model_desc.lazy_closure.size(); ++tidx) {
+            const auto& lt = m_compiled_submodels[idx].lazy_closure[tidx];
             m_compiled_submodels[idx].closure.push_back(m_weights_bank->get(lt, *func_desc.device_it));
             // FIXME: should is_remote be set unconditionally?
             m_compiled_submodels[idx].is_remote.push_back(true);
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
index d565abcf4444c4..3ec92ef21bd71d 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
@@ -122,7 +122,7 @@ class CompiledModel : public ov::ICompiledModel {
 
         std::size_t param_base = 0;
         std::vector<ov::Tensor> closure;
-        std::vector<weights::LazyTensor> transformations;
+        std::vector<weights::LazyTensor> lazy_closure;
         std::vector<ov::Tensor> scales;
         std::vector<ov::Tensor> zerops;
         std::vector<bool> is_remote;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
index 9498cae7f75457..26455545529a8b 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
@@ -6,38 +6,83 @@
 
 using ov::npuw::weights::ConcatMeta;
 using ov::npuw::weights::ConstPtr;
-using ov::npuw::weights::LTData;
 using ov::npuw::weights::LazyTensor;
+using ov::npuw::weights::LTData;
 using ov::npuw::weights::Transform;
 using ov::npuw::weights::TransformType;
 
-std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const {
-    std::size_t seed = std::hash<void*>()(lt.m_orig_data) + 0x9e3779b9;
-    seed ^= std::hash<std::string>()(lt.m_orig_shape.to_string()) + 0x9e3779b9;
-    seed ^= std::hash<std::string>()(lt.m_orig_type.to_string()) + 0x9e3779b9;
-    for (const auto& tr : lt.m_transforms) {
-        seed ^= std::hash<int>()(static_cast<int>(tr.first)) + 0x9e3779b9;
-        if (tr.first == TransformType::PERMUTE) {
-            const auto& axes = std::get<std::vector<std::size_t>>(tr.second);
-            for (const auto& axis : axes) {
-                seed ^= std::hash<std::size_t>()(axis) + 0x9e3779b9;
-            }
-        } else if (tr.first == TransformType::CONCAT) {
-            const auto& axis = std::get<ConcatMeta>(tr.second).second;
+namespace ov {
+namespace npuw {
+namespace weights {
+
+struct LazyTensorImpl {
+public:
+    explicit LazyTensorImpl() = default;
+    explicit LazyTensorImpl(const TransformType& type, const Transform& transform);
+
+    bool operator==(const LazyTensorImpl& other) const;
+
+    ov::Tensor eval() const;
+
+    ov::Tensor get_orig_tensor() const;
+
+    std::size_t get_hash() const;
+
+    bool has_transformations() const;
+
+    std::shared_ptr<LazyTensorImpl> m_parent = nullptr;
+    std::pair<TransformType, Transform> m_transform;
+    std::size_t m_hash = 0;
+
+    void* m_orig_data = nullptr;
+    ov::Shape m_orig_shape;
+    ov::element::Type m_orig_type;
+};
+
+std::size_t LazyTensorImpl::get_hash() const {
+    // Already calculated
+    if (m_hash != 0) {
+        return m_hash;
+    }
+
+    // Get parent's hash
+    std::size_t seed = 0;
+    if (m_parent) {
+        seed = m_parent->get_hash();
+    } else {
+        seed = std::hash<void*>()(m_orig_data) + 0x9e3779b9;
+        seed ^= std::hash<std::string>()(m_orig_shape.to_string()) + 0x9e3779b9;
+        seed ^= std::hash<std::string>()(m_orig_type.to_string()) + 0x9e3779b9;
+    }
+
+    // Combine with this hash
+    seed ^= std::hash<int>()(static_cast<int>(m_transform.first)) + 0x9e3779b9;
+    if (m_transform.first == TransformType::PERMUTE) {
+        const auto& axes = std::get<std::vector<std::size_t>>(m_transform.second);
+        for (const auto& axis : axes) {
             seed ^= std::hash<std::size_t>()(axis) + 0x9e3779b9;
-            for (const auto& lt : std::get<ConcatMeta>(tr.second).first) {
-                seed ^= LazyTensor::Hash::operator()(lt) + 0x9e3779b9;
-            }
+        }
+    } else if (m_transform.first == TransformType::CONCAT) {
+        const auto& axis = std::get<ConcatMeta>(m_transform.second).second;
+        seed ^= std::hash<std::size_t>()(axis) + 0x9e3779b9;
+        for (auto& lt : std::get<ConcatMeta>(m_transform.second).first) {
+            seed ^= lt.get_hash() + 0x9e3779b9;
         }
     }
-    return seed;
+
+    return m_hash;
 }
+}  // namespace weights
+}  // namespace npuw
+}  // namespace ov
 
-LazyTensor::LazyTensor(const TransformType& type, const Transform& transform) {
+using ov::npuw::weights::LazyTensorImpl;
+
+LazyTensorImpl::LazyTensorImpl(const TransformType& type, const Transform& transform) {
     if (type == TransformType::TENSOR && std::holds_alternative<LTData>(transform)) {
-        m_transforms.push_back({type, transform});
+        m_transform = std::make_pair(type, transform);
         ov::Tensor tensor;
-        if (std::holds_alternative<ConstPtr>(std::get<LTData>(transform))){
+        if (std::holds_alternative<ConstPtr>(std::get<LTData>(transform))) {
             tensor = ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(transform)));
         } else {
             tensor = std::get<ov::Tensor>(std::get<LTData>(transform));
@@ -46,56 +91,67 @@ LazyTensor::LazyTensor(const TransformType& type, const Transform& transform) {
         m_orig_shape = tensor.get_shape();
         m_orig_type = tensor.get_element_type();
     } else if (type == TransformType::CONCAT && std::holds_alternative<ConcatMeta>(transform)) {
-        m_transforms.push_back({type, transform});
+        m_transform = std::make_pair(type, transform);
     } else {
         NPUW_ASSERT(false);
     }
+
+    m_hash = get_hash();
 }
 
-bool LazyTensor::operator==(const LazyTensor& other) const {
-    if (m_orig_data != other.m_orig_data || m_orig_shape != other.m_orig_shape || m_orig_type != other.m_orig_type ||
-        m_transforms.size() != other.m_transforms.size()) {
+bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
+    if (m_hash != other.m_hash || m_orig_data != other.m_orig_data || m_orig_shape != other.m_orig_shape ||
+        m_orig_type != other.m_orig_type || m_transform.first != other.m_transform.first) {
         return false;
     }
 
-    for (size_t i = 0; i < m_transforms.size(); ++i) {
-        if (m_transforms[i].first != other.m_transforms[i].first) {
+    ConcatMeta m1, m2;
+
+    switch (m_transform.first) {
+    case TransformType::TENSOR:
+        // everything is already compared above - skip
+        break;
+    case TransformType::CONVERT:
+        // everything is already compared above - skip
+        break;
+    case TransformType::PERMUTE:
+        if (std::get<std::vector<std::size_t>>(m_transform.second) !=
+            std::get<std::vector<std::size_t>>(other.m_transform.second)) {
             return false;
         }
-        // Only PERMUTE and CONCAT have meta which needs to be compared
-        if (m_transforms[i].first == TransformType::PERMUTE) {
-            if (std::get<std::vector<std::size_t>>(m_transforms[i].second) !=
-                std::get<std::vector<std::size_t>>(other.m_transforms[i].second)) {
-                return false;
-            }
-        } else if (m_transforms[i].first == TransformType::CONCAT) {
-            const auto& m1 = std::get<ConcatMeta>(m_transforms[i].second);
-            const auto& m2 = std::get<ConcatMeta>(other.m_transforms[i].second);
-            if (m1.second != m2.second) {
-                return false;
-            }
-            if (m1.first.size() != m2.first.size()) {
+        break;
+    case TransformType::CONCAT:
+        m1 = std::get<ConcatMeta>(m_transform.second);
+        m2 = std::get<ConcatMeta>(other.m_transform.second);
+        if (m1.second != m2.second) {
+            return false;
+        }
+        if (m1.first.size() != m2.first.size()) {
+            return false;
+        }
+        for (std::size_t mi = 0; mi < m1.first.size(); ++mi) {
+            if (m1.first[mi] != m2.first[mi]) {
                 return false;
             }
-            for (std::size_t mi = 0; mi < m1.first.size(); ++mi) {
-                if (!(m1.first[mi] == m2.first[mi])) {
-                    return false;
-                }
-            }
         }
+        break;
+    default:
+        NPUW_ASSERT(false);
+        break;
     }
 
-    return true;
-}
+    if ((m_parent && !other.m_parent) || (!m_parent && other.m_parent)) {
+        return false;
+    }
 
-void LazyTensor::update(const TransformType& type, const Transform& transform) {
-    // Sanity check
-    NPUW_ASSERT((type == TransformType::PERMUTE && std::holds_alternative<std::vector<std::size_t>>(transform)) ||
-                (type == TransformType::CONVERT && std::holds_alternative<std::monostate>(transform)));
-    m_transforms.push_back({type, transform});
+    if (m_parent && other.m_parent) {
+        return *m_parent.get() == *other.m_parent.get();
+    }
+
+    return true;
 }
 
-ov::Tensor LazyTensor::eval() const {
+ov::Tensor LazyTensorImpl::eval() const {
     /* FIXME:
     Consider case:
         model1: concat->permute->f16
@@ -106,56 +162,95 @@ ov::Tensor LazyTensor::eval() const {
     Perhaps it should be done after model compilation and not handled here.
     */
 
-    ov::Tensor transformed;
-    ov::Tensor tnew;
-
-    NPUW_ASSERT(!m_transforms.empty());
-
     // Process the initial tensor - either from Const or from Concat
-    if (m_transforms.front().first == TransformType::TENSOR) {
-        transformed = get_orig_tensor();
-    } else if (m_transforms.front().first == TransformType::CONCAT) {
-        std::vector<ov::Tensor> to_concat;
-        for (const auto& lt : std::get<ConcatMeta>(m_transforms.front().second).first) {
-            // Sanity check
-            NPUW_ASSERT(!lt.has_transformations());
-            to_concat.push_back(lt.get_orig_tensor());
+    if (!m_parent) {
+        if (m_transform.first == TransformType::TENSOR) {
+            return get_orig_tensor();
+        } else if (m_transform.first == TransformType::CONCAT) {
+            std::vector<ov::Tensor> to_concat;
+            for (const auto& lt : std::get<ConcatMeta>(m_transform.second).first) {
+                // Sanity check
+                NPUW_ASSERT(!lt.has_transformations());
+                to_concat.push_back(lt.get_orig_tensor());
+            }
+            return ov::npuw::util::concat(to_concat, std::get<ConcatMeta>(m_transform.second).second);
+        } else {
+            NPUW_ASSERT(false);
         }
-        transformed = ov::npuw::util::concat(to_concat, std::get<ConcatMeta>(m_transforms.front().second).second);
-    } else {
-        NPUW_ASSERT(false);
     }
 
-    // Process transformation on top of initial tensor
-    for (std::size_t i = 1; i < m_transforms.size(); ++i) {
-        const auto& tr = m_transforms[i];
-        switch (tr.first) {
-        case TransformType::PERMUTE:
-            tnew = ov::npuw::util::permute(transformed, std::get<std::vector<std::size_t>>(tr.second));
-            tnew.copy_to(transformed);
-        case TransformType::CONVERT:
-            tnew = ov::npuw::util::to_f16(transformed);
-            tnew.copy_to(transformed);
-        default:
-            NPUW_ASSERT(false);
-        }
+    // Process transformation
+    switch (m_transform.first) {
+    case TransformType::PERMUTE:
+        return ov::npuw::util::permute(m_parent->eval(), std::get<std::vector<std::size_t>>(m_transform.second));
+    case TransformType::CONVERT:
+        return ov::npuw::util::to_f16(m_parent->eval());
+    default:
+        NPUW_ASSERT(false);
     }
-    return transformed;
+
+    NPUW_ASSERT(false);
+    return ov::Tensor();
 }
 
-ov::Tensor LazyTensor::get_orig_tensor() const {
+ov::Tensor LazyTensorImpl::get_orig_tensor() const {
     // Sanity check
     NPUW_ASSERT(!has_transformations());
-    if (std::holds_alternative<ConstPtr>(std::get<LTData>(m_transforms.front().second))){
-        return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(m_transforms.front().second)));
+    if (std::holds_alternative<ConstPtr>(std::get<LTData>(m_transform.second))) {
+        return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(m_transform.second)));
     }
-    return std::get<ov::Tensor>(std::get<LTData>(m_transforms.front().second));
+    return std::get<ov::Tensor>(std::get<LTData>(m_transform.second));
 }
 
-bool LazyTensor::has_transformations() const {
-    // The first transformation is always initial Tensor or Concat
-    if (m_transforms.size() == 1 && m_transforms.front().first == TransformType::TENSOR) {
+bool LazyTensorImpl::has_transformations() const {
+    if (m_parent) {
         return false;
     }
     return true;
 }
+
+LazyTensor::LazyTensor(const TransformType& type, const Transform& transform)
+    : m_impl(std::make_shared<LazyTensorImpl>(type, transform)) {}
+
+bool LazyTensor::operator==(const LazyTensor& other) const {
+    return *m_impl.get() == *other.m_impl.get();
+}
+
+bool LazyTensor::operator!=(const LazyTensor& other) const {
+    return !(*m_impl.get() == *other.m_impl.get());
+}
+
+void LazyTensor::update(const TransformType& type, const Transform& transform) {
+    const auto& curr = m_impl;
+    auto new_lt = std::make_shared<LazyTensorImpl>();
+
+    new_lt->m_orig_data = curr->m_orig_data;
+    new_lt->m_orig_shape = curr->m_orig_shape;
+    new_lt->m_orig_type = curr->m_orig_type;
+
+    new_lt->m_transform = std::make_pair(type, transform);
+    new_lt->m_parent = curr;
+    new_lt->m_hash = new_lt->get_hash();
+
+    m_impl = new_lt;
+}
+
+ov::Tensor LazyTensor::eval() const {
+    return m_impl->eval();
+}
+
+ov::Tensor LazyTensor::get_orig_tensor() const {
+    return m_impl->get_orig_tensor();
+}
+
+std::size_t LazyTensor::get_hash() const {
+    return m_impl->get_hash();
+}
+
+std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const {
+    return lt.get_hash();
+}
+
+bool LazyTensor::has_transformations() const {
+    return m_impl->has_transformations();
+}
diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
index fd6ea17e8b1659..ef5abdcd7842f3 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
@@ -23,6 +23,7 @@ enum class TransformType : int { TENSOR, PERMUTE, CONVERT, CONCAT };
 
 // Forward declaration
 class LazyTensor;
+struct LazyTensorImpl;
 
 using ConcatMeta = std::pair<std::vector<LazyTensor>, std::size_t>;
 using ConstPtr = std::shared_ptr<ov::op::v0::Constant>;
@@ -42,19 +43,17 @@ class LazyTensor {
     explicit LazyTensor(const TransformType& type, const Transform& transform);
 
     bool operator==(const LazyTensor& other) const;
+    bool operator!=(const LazyTensor& other) const;
 
     void update(const TransformType& type, const Transform& transform);
     ov::Tensor eval() const;
 
     ov::Tensor get_orig_tensor() const;
-
+    std::size_t get_hash() const;
     bool has_transformations() const;
 
 private:
-    std::vector<std::pair<TransformType, Transform>> m_transforms;
-    void* m_orig_data = nullptr;
-    ov::Shape m_orig_shape;
-    ov::element::Type m_orig_type;
+    std::shared_ptr<LazyTensorImpl> m_impl = nullptr;
 };
 
 }  // namespace weights
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index 811f7cb5666617..29f568a75f2dfd 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1460,8 +1460,9 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) {
                 new_param_idx++;
 
                 LOG_DEBUG("Register " << prod_output << " in the function closure");
-                funcall._transformations.push_back(LazyTensor(
-                    TransformType::TENSOR, std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node)));  // (n)/1/i/c
+                funcall._lazy_closure.push_back(
+                    LazyTensor(TransformType::TENSOR,
+                               std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node)));  // (n)/1/i/c
             } else if (ov::op::util::is_parameter(input_node)) {
                 LOG_DEBUG("Handling a Parameter input " << prod_output);
                 LOG_BLOCK();
@@ -1536,7 +1537,7 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
         LOG_BLOCK();
         const auto& function = func_iter->second;
         funcall._closure.resize(function._num_params_total - function._param_offset);
-        funcall._transformations.resize(function._num_params_total - function._param_offset);
+        funcall._lazy_closure.resize(function._num_params_total - function._param_offset);
 
         auto tmp_model = *mod_iter;
         for (auto&& node_ptr : tmp_model->get_ordered_ops()) {
@@ -1557,8 +1558,9 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
                         std::make_pair(proto_layer_name, input_desc.get_index()));  // (t)/1/b
                     LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx
                                           << "] (via prototype " << proto_layer_name << ")");
-                    funcall._transformations[param_idx - function._param_offset] = LazyTensor(
-                        TransformType::TENSOR, std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node));  // (t)/1/c
+                    funcall._lazy_closure[param_idx - function._param_offset] =
+                        LazyTensor(TransformType::TENSOR,
+                                   std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node));  // (t)/1/c
                 }
             }  // for (inputs)
         }      // for(nodes)
@@ -1583,7 +1585,7 @@ void Partitioner::optimize(const std::string& func_name) {
             auto closure_idx = param_idx - f._param_offset;
             ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) {
                 auto& funcall = func_group.refs[f_idx].get();
-                funcall._transformations[closure_idx].update(TransformType::PERMUTE, p.second);
+                funcall._lazy_closure[closure_idx].update(TransformType::PERMUTE, p.second);
             });
         }
     };
@@ -1593,7 +1595,7 @@ void Partitioner::optimize(const std::string& func_name) {
             auto closure_idx = param_idx - f._param_offset;
             ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) {
                 auto& funcall = func_group.refs[f_idx].get();
-                funcall._transformations[closure_idx].update(TransformType::CONVERT, std::monostate{});
+                funcall._lazy_closure[closure_idx].update(TransformType::CONVERT, std::monostate{});
             });
         }
     };
@@ -1625,8 +1627,6 @@ void Partitioner::optimize(const std::string& func_name) {
         // Run parallel matmul merge
         mergeParallelMatMuls(f._model, ctx);
 
-        // Mark LazyTensors to be concatenated later
-        // Note: closures are properly processed later as well
         ov::ParameterVector new_params;
         std::vector<ov::npuw::patterns::opt::Context::PPtr> to_remove;
         std::set<std::size_t> to_remove_idx;
@@ -1651,12 +1651,13 @@ void Partitioner::optimize(const std::string& func_name) {
                 for (auto&& cidx : to_concat_idx) {
                     // FIXME: Assuming here concat goes first and other transformations later.
                     //        This allows to store ov::Tensor and ignore their potential history of transformations
-                    NPUW_ASSERT(!funcall._transformations[cidx].has_transformations());
-                    to_concat.push_back(funcall._transformations[cidx]);
+                    NPUW_ASSERT(!funcall._lazy_closure[cidx].has_transformations());
+                    to_concat.push_back(funcall._lazy_closure[cidx]);
                 }
-                // Note: we can ignore updating funcall._transformations[cidx] here since those LazyTensors will be gone and the new one added into the vector
+                // Note: we can ignore updating funcall._lazy_closure[cidx] here since those LazyTensors will be gone
+                // and the new one added into the vector
                 if (!to_concat.empty()) {
-                    funcall._transformations.push_back(LazyTensor(TransformType::CONCAT, std::make_pair(to_concat, axis)));
+                    funcall._lazy_closure.push_back(LazyTensor(TransformType::CONCAT, std::make_pair(to_concat, axis)));
                 }
             });
         }
@@ -1683,15 +1684,15 @@ void Partitioner::optimize(const std::string& func_name) {
                 auto& funcall = func_group.refs[f_idx].get();
                 // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original
                 // ov::Tensor below
-                NPUW_ASSERT(!funcall._transformations[w_idx - f._param_offset].has_transformations());
+                NPUW_ASSERT(!funcall._lazy_closure[w_idx - f._param_offset].has_transformations());
                 if (z_idx != -1) {
-                    NPUW_ASSERT(!funcall._transformations[z_idx - f._param_offset].has_transformations());
+                    NPUW_ASSERT(!funcall._lazy_closure[z_idx - f._param_offset].has_transformations());
                 }
-                NPUW_ASSERT(!funcall._transformations[s_idx - f._param_offset].has_transformations());
-                ov::Tensor cw = funcall._transformations[w_idx - f._param_offset].get_orig_tensor();
+                NPUW_ASSERT(!funcall._lazy_closure[s_idx - f._param_offset].has_transformations());
+                ov::Tensor cw = funcall._lazy_closure[w_idx - f._param_offset].get_orig_tensor();
                 ov::Tensor cz =
-                    z_idx != -1 ? funcall._transformations[z_idx - f._param_offset].get_orig_tensor() : ov::Tensor{};
-                ov::Tensor cs = funcall._transformations[s_idx - f._param_offset].get_orig_tensor();
+                    z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset].get_orig_tensor() : ov::Tensor{};
+                ov::Tensor cs = funcall._lazy_closure[s_idx - f._param_offset].get_orig_tensor();
                 ov::Tensor dst(p.first->get_element_type(), p.first->get_shape());
 
                 const auto& gti = ov::get_tensor_impl;
@@ -1702,7 +1703,7 @@ void Partitioner::optimize(const std::string& func_name) {
                 } else {
                     NPUW_ASSERT(false && "Unsupported combination");
                 }
-                funcall._transformations.push_back(LazyTensor(TransformType::TENSOR, std::move(dst)));
+                funcall._lazy_closure.push_back(LazyTensor(TransformType::TENSOR, std::move(dst)));
             });
         }
 
@@ -1716,7 +1717,7 @@ void Partitioner::optimize(const std::string& func_name) {
             for (auto&& funcall : func_group.refs) {
                 auto new_elem_type = params_to_gather.pnew->get_element_type();
                 auto new_shape = params_to_gather.pnew->get_shape();
-                funcall.get()._transformations.push_back(
+                funcall.get()._lazy_closure.push_back(
                     LazyTensor(TransformType::TENSOR, ov::Tensor(new_elem_type, new_shape)));
             }
         }
@@ -1728,12 +1729,12 @@ void Partitioner::optimize(const std::string& func_name) {
         for (auto&& fref : func_group.refs) {
             auto& funcall = fref.get();
             std::vector<LazyTensor> new_transforms;
-            for (std::size_t tidx = 0; tidx < funcall._transformations.size(); tidx++) {
+            for (std::size_t tidx = 0; tidx < funcall._lazy_closure.size(); tidx++) {
                 if (to_remove_idx.count(f._param_offset + tidx) == 0) {
-                    new_transforms.push_back(funcall._transformations[tidx]);
+                    new_transforms.push_back(funcall._lazy_closure[tidx]);
                 }
             }
-            funcall._transformations = std::move(new_transforms);
+            funcall._lazy_closure = std::move(new_transforms);
         }
         // Remove parameters that were concatenated
         for (auto&& now_remove : to_remove) {
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
index 5599fdb409e3db..17f36115dce456 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
@@ -44,7 +44,7 @@ struct Subgraph {
     std::vector<ov::Tensor> _zerops;  // Zero points for manual unpacking
 
     // Stores transformation history for weights which will be applied before inference
-    std::vector<weights::LazyTensor> _transformations;
+    std::vector<weights::LazyTensor> _lazy_closure;
 
     struct Gather {
         // NB.: int64_t is strange but it is used by OV to refer to parameters
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
index e6996b8428f21a..d9e808272a8d79 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
@@ -124,30 +124,30 @@ void apply_remap(Subgraph& fcall, const ClosureRemap& m) {
     // reserve a new_scales vector to have the same size, filled with
     // empty tensors by default.
     for (auto&& i : m.closure_remap) {
-        new_transformations.push_back(fcall._transformations[i]);
+        new_transformations.push_back(fcall._lazy_closure[i]);
 
         auto scale_iter = m.scale_remap.find(i);
         auto zerop_iter = m.zerop_remap.find(i);
         // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original
         // ov::Tensor below
         if (scale_iter != m.scale_remap.end()) {
-            NPUW_ASSERT(!fcall._transformations[scale_iter->second].has_transformations());
+            NPUW_ASSERT(!fcall._lazy_closure[scale_iter->second].has_transformations());
         }
         if (zerop_iter != m.zerop_remap.end()) {
-            NPUW_ASSERT(!fcall._transformations[zerop_iter->second].has_transformations());
+            NPUW_ASSERT(!fcall._lazy_closure[zerop_iter->second].has_transformations());
         }
         new_scales.push_back(scale_iter != m.scale_remap.end()
-                                 ? fcall._transformations[scale_iter->second].get_orig_tensor()
+                                 ? fcall._lazy_closure[scale_iter->second].get_orig_tensor()
                                  : ov::Tensor());
         // Check for asymmetric zero points and add them to new_zerops
         const auto& zerop = zerop_iter != m.zerop_remap.end()
-                                ? fcall._transformations[zerop_iter->second].get_orig_tensor()
+                                ? fcall._lazy_closure[zerop_iter->second].get_orig_tensor()
                                 : m.zero_points[i];
         new_zerops.push_back(zerop);
     }
     fcall._scales = std::move(new_scales);
     fcall._zerops = std::move(new_zerops);
-    fcall._transformations = std::move(new_transformations);
+    fcall._lazy_closure = std::move(new_transformations);
 }
 
 void finalize_remap(Function& fbody, const ClosureRemap& m) {