From 2ae943e059a1a6c173680b6c67c419fd95a95d6f Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 2 Oct 2024 18:55:04 +0000 Subject: [PATCH] Address review comments part 2 --- .../src/plugin/npuw/compiled_model.cpp | 6 +- .../src/plugin/npuw/compiled_model.hpp | 2 +- .../intel_npu/src/plugin/npuw/lazy_tensor.cpp | 275 ++++++++++++------ .../intel_npu/src/plugin/npuw/lazy_tensor.hpp | 9 +- .../plugin/npuw/partitioning/partitioning.cpp | 49 ++-- .../plugin/npuw/partitioning/partitioning.hpp | 2 +- .../npuw/partitioning/patterns/dcoff.cpp | 12 +- 7 files changed, 225 insertions(+), 130 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 73345113e89b2d..21ad4a32bd6fb5 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -288,7 +288,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, m_compiled_submodels[id].host_gather = subgraph._host_gather; m_compiled_submodels[id].param_base = fcn_template._param_offset; m_compiled_submodels[id].closure = subgraph._closure; - m_compiled_submodels[id].transformations = subgraph._transformations; + m_compiled_submodels[id].lazy_closure = subgraph._lazy_closure; m_compiled_submodels[id].scales = subgraph._scales; m_compiled_submodels[id].zerops = subgraph._zerops; m_compiled_submodels[id].is_remote.resize(subgraph._closure.size(), false); @@ -426,8 +426,8 @@ void ov::npuw::CompiledModel::finalize_weights_bank() { m_compiled_submodels[idx].closure.resize(0); m_compiled_submodels[idx].is_remote.resize(0); - for (std::size_t tidx = 0; tidx < comp_model_desc.transformations.size(); ++tidx) { - const auto& lt = m_compiled_submodels[idx].transformations[tidx]; + for (std::size_t tidx = 0; tidx < comp_model_desc.lazy_closure.size(); ++tidx) { + const auto& lt = m_compiled_submodels[idx].lazy_closure[tidx]; m_compiled_submodels[idx].closure.push_back(m_weights_bank->get(lt, *func_desc.device_it)); // FIXME: should is_remote be set unconditionally? m_compiled_submodels[idx].is_remote.push_back(true); diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp index d565abcf4444c4..3ec92ef21bd71d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp @@ -122,7 +122,7 @@ class CompiledModel : public ov::ICompiledModel { std::size_t param_base = 0; std::vector closure; - std::vector transformations; + std::vector lazy_closure; std::vector scales; std::vector zerops; std::vector is_remote; diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp index 9498cae7f75457..26455545529a8b 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp @@ -6,38 +6,83 @@ using ov::npuw::weights::ConcatMeta; using ov::npuw::weights::ConstPtr; -using ov::npuw::weights::LTData; using ov::npuw::weights::LazyTensor; +using ov::npuw::weights::LTData; using ov::npuw::weights::Transform; using ov::npuw::weights::TransformType; -std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const { - std::size_t seed = std::hash()(lt.m_orig_data) + 0x9e3779b9; - seed ^= std::hash()(lt.m_orig_shape.to_string()) + 0x9e3779b9; - seed ^= std::hash()(lt.m_orig_type.to_string()) + 0x9e3779b9; - for (const auto& tr : lt.m_transforms) { - seed ^= std::hash()(static_cast(tr.first)) + 0x9e3779b9; - if (tr.first == TransformType::PERMUTE) { - const auto& axes = std::get>(tr.second); - for (const auto& axis : axes) { - seed ^= std::hash()(axis) + 0x9e3779b9; - } - } else if (tr.first == TransformType::CONCAT) { - const auto& axis = std::get(tr.second).second; +namespace ov { +namespace npuw { +namespace weights { + +struct LazyTensorImpl { +public: + explicit LazyTensorImpl() = default; + explicit LazyTensorImpl(const TransformType& type, const Transform& transform); + + bool operator==(const LazyTensorImpl& other) const; + + ov::Tensor eval() const; + + ov::Tensor get_orig_tensor() const; + + std::size_t get_hash() const; + + bool has_transformations() const; + + std::shared_ptr m_parent = nullptr; + std::pair m_transform; + std::size_t m_hash = 0; + + void* m_orig_data = nullptr; + ov::Shape m_orig_shape; + ov::element::Type m_orig_type; +}; + +std::size_t LazyTensorImpl::get_hash() const { + // Already calculated + if (m_hash != 0) { + return m_hash; + } + + // Get parent's hash + std::size_t seed = 0; + if (m_parent) { + seed = m_parent->get_hash(); + } else { + seed = std::hash()(m_orig_data) + 0x9e3779b9; + seed ^= std::hash()(m_orig_shape.to_string()) + 0x9e3779b9; + seed ^= std::hash()(m_orig_type.to_string()) + 0x9e3779b9; + } + + // Combine with this hash + seed ^= std::hash()(static_cast(m_transform.first)) + 0x9e3779b9; + if (m_transform.first == TransformType::PERMUTE) { + const auto& axes = std::get>(m_transform.second); + for (const auto& axis : axes) { seed ^= std::hash()(axis) + 0x9e3779b9; - for (const auto& lt : std::get(tr.second).first) { - seed ^= LazyTensor::Hash::operator()(lt) + 0x9e3779b9; - } + } + } else if (m_transform.first == TransformType::CONCAT) { + const auto& axis = std::get(m_transform.second).second; + seed ^= std::hash()(axis) + 0x9e3779b9; + for (auto& lt : std::get(m_transform.second).first) { + seed ^= lt.get_hash() + 0x9e3779b9; } } - return seed; + + return m_hash; } +} // namespace weights +} // namespace npuw +} // namespace ov -LazyTensor::LazyTensor(const TransformType& type, const Transform& transform) { +using ov::npuw::weights::LazyTensorImpl; + +LazyTensorImpl::LazyTensorImpl(const TransformType& type, const Transform& transform) { if (type == TransformType::TENSOR && std::holds_alternative(transform)) { - m_transforms.push_back({type, transform}); + m_transform = std::make_pair(type, transform); ov::Tensor tensor; - if (std::holds_alternative(std::get(transform))){ + if (std::holds_alternative(std::get(transform))) { tensor = ov::npuw::util::tensor_from_const(std::get(std::get(transform))); } else { tensor = std::get(std::get(transform)); @@ -46,56 +91,67 @@ LazyTensor::LazyTensor(const TransformType& type, const Transform& transform) { m_orig_shape = tensor.get_shape(); m_orig_type = tensor.get_element_type(); } else if (type == TransformType::CONCAT && std::holds_alternative(transform)) { - m_transforms.push_back({type, transform}); + m_transform = std::make_pair(type, transform); } else { NPUW_ASSERT(false); } + + m_hash = get_hash(); } -bool LazyTensor::operator==(const LazyTensor& other) const { - if (m_orig_data != other.m_orig_data || m_orig_shape != other.m_orig_shape || m_orig_type != other.m_orig_type || - m_transforms.size() != other.m_transforms.size()) { +bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const { + if (m_hash != other.m_hash || m_orig_data != other.m_orig_data || m_orig_shape != other.m_orig_shape || + m_orig_type != other.m_orig_type || m_transform.first != other.m_transform.first) { return false; } - for (size_t i = 0; i < m_transforms.size(); ++i) { - if (m_transforms[i].first != other.m_transforms[i].first) { + ConcatMeta m1, m2; + + switch (m_transform.first) { + case TransformType::TENSOR: + // everything is already compared above - skip + break; + case TransformType::CONVERT: + // everything is already compared above - skip + break; + case TransformType::PERMUTE: + if (std::get>(m_transform.second) != + std::get>(other.m_transform.second)) { return false; } - // Only PERMUTE and CONCAT have meta which needs to be compared - if (m_transforms[i].first == TransformType::PERMUTE) { - if (std::get>(m_transforms[i].second) != - std::get>(other.m_transforms[i].second)) { - return false; - } - } else if (m_transforms[i].first == TransformType::CONCAT) { - const auto& m1 = std::get(m_transforms[i].second); - const auto& m2 = std::get(other.m_transforms[i].second); - if (m1.second != m2.second) { - return false; - } - if (m1.first.size() != m2.first.size()) { + break; + case TransformType::CONCAT: + m1 = std::get(m_transform.second); + m2 = std::get(other.m_transform.second); + if (m1.second != m2.second) { + return false; + } + if (m1.first.size() != m2.first.size()) { + return false; + } + for (std::size_t mi = 0; mi < m1.first.size(); ++mi) { + if (m1.first[mi] != m2.first[mi]) { return false; } - for (std::size_t mi = 0; mi < m1.first.size(); ++mi) { - if (!(m1.first[mi] == m2.first[mi])) { - return false; - } - } } + break; + default: + NPUW_ASSERT(false); + break; } - return true; -} + if ((m_parent && !other.m_parent) || (!m_parent && other.m_parent)) { + return false; + } -void LazyTensor::update(const TransformType& type, const Transform& transform) { - // Sanity check - NPUW_ASSERT((type == TransformType::PERMUTE && std::holds_alternative>(transform)) || - (type == TransformType::CONVERT && std::holds_alternative(transform))); - m_transforms.push_back({type, transform}); + if (m_parent && other.m_parent) { + return *m_parent.get() == *other.m_parent.get(); + } + + return true; } -ov::Tensor LazyTensor::eval() const { +ov::Tensor LazyTensorImpl::eval() const { /* FIXME: Consider case: model1: concat->permute->f16 @@ -106,56 +162,95 @@ ov::Tensor LazyTensor::eval() const { Perhaps it should be done after model compilation and not handled here. */ - ov::Tensor transformed; - ov::Tensor tnew; - - NPUW_ASSERT(!m_transforms.empty()); - // Process the initial tensor - either from Const or from Concat - if (m_transforms.front().first == TransformType::TENSOR) { - transformed = get_orig_tensor(); - } else if (m_transforms.front().first == TransformType::CONCAT) { - std::vector to_concat; - for (const auto& lt : std::get(m_transforms.front().second).first) { - // Sanity check - NPUW_ASSERT(!lt.has_transformations()); - to_concat.push_back(lt.get_orig_tensor()); + if (!m_parent) { + if (m_transform.first == TransformType::TENSOR) { + return get_orig_tensor(); + } else if (m_transform.first == TransformType::CONCAT) { + std::vector to_concat; + for (const auto& lt : std::get(m_transform.second).first) { + // Sanity check + NPUW_ASSERT(!lt.has_transformations()); + to_concat.push_back(lt.get_orig_tensor()); + } + return ov::npuw::util::concat(to_concat, std::get(m_transform.second).second); + } else { + NPUW_ASSERT(false); } - transformed = ov::npuw::util::concat(to_concat, std::get(m_transforms.front().second).second); - } else { - NPUW_ASSERT(false); } - // Process transformation on top of initial tensor - for (std::size_t i = 1; i < m_transforms.size(); ++i) { - const auto& tr = m_transforms[i]; - switch (tr.first) { - case TransformType::PERMUTE: - tnew = ov::npuw::util::permute(transformed, std::get>(tr.second)); - tnew.copy_to(transformed); - case TransformType::CONVERT: - tnew = ov::npuw::util::to_f16(transformed); - tnew.copy_to(transformed); - default: - NPUW_ASSERT(false); - } + // Process transformation + switch (m_transform.first) { + case TransformType::PERMUTE: + return ov::npuw::util::permute(m_parent->eval(), std::get>(m_transform.second)); + case TransformType::CONVERT: + return ov::npuw::util::to_f16(m_parent->eval()); + default: + NPUW_ASSERT(false); } - return transformed; + + NPUW_ASSERT(false); + return ov::Tensor(); } -ov::Tensor LazyTensor::get_orig_tensor() const { +ov::Tensor LazyTensorImpl::get_orig_tensor() const { // Sanity check NPUW_ASSERT(!has_transformations()); - if (std::holds_alternative(std::get(m_transforms.front().second))){ - return ov::npuw::util::tensor_from_const(std::get(std::get(m_transforms.front().second))); + if (std::holds_alternative(std::get(m_transform.second))) { + return ov::npuw::util::tensor_from_const(std::get(std::get(m_transform.second))); } - return std::get(std::get(m_transforms.front().second)); + return std::get(std::get(m_transform.second)); } -bool LazyTensor::has_transformations() const { - // The first transformation is always initial Tensor or Concat - if (m_transforms.size() == 1 && m_transforms.front().first == TransformType::TENSOR) { +bool LazyTensorImpl::has_transformations() const { + if (m_parent) { return false; } return true; } + +LazyTensor::LazyTensor(const TransformType& type, const Transform& transform) + : m_impl(std::make_shared(type, transform)) {} + +bool LazyTensor::operator==(const LazyTensor& other) const { + return *m_impl.get() == *other.m_impl.get(); +} + +bool LazyTensor::operator!=(const LazyTensor& other) const { + return !(*m_impl.get() == *other.m_impl.get()); +} + +void LazyTensor::update(const TransformType& type, const Transform& transform) { + const auto& curr = m_impl; + auto new_lt = std::make_shared(); + + new_lt->m_orig_data = curr->m_orig_data; + new_lt->m_orig_shape = curr->m_orig_shape; + new_lt->m_orig_type = curr->m_orig_type; + + new_lt->m_transform = std::make_pair(type, transform); + new_lt->m_parent = curr; + new_lt->m_hash = new_lt->get_hash(); + + m_impl = new_lt; +} + +ov::Tensor LazyTensor::eval() const { + return m_impl->eval(); +} + +ov::Tensor LazyTensor::get_orig_tensor() const { + return m_impl->get_orig_tensor(); +} + +std::size_t LazyTensor::get_hash() const { + return m_impl->get_hash(); +} + +std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const { + return lt.get_hash(); +} + +bool LazyTensor::has_transformations() const { + return m_impl->has_transformations(); +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp index fd6ea17e8b1659..ef5abdcd7842f3 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp @@ -23,6 +23,7 @@ enum class TransformType : int { TENSOR, PERMUTE, CONVERT, CONCAT }; // Forward declaration class LazyTensor; +struct LazyTensorImpl; using ConcatMeta = std::pair, std::size_t>; using ConstPtr = std::shared_ptr; @@ -42,19 +43,17 @@ class LazyTensor { explicit LazyTensor(const TransformType& type, const Transform& transform); bool operator==(const LazyTensor& other) const; + bool operator!=(const LazyTensor& other) const; void update(const TransformType& type, const Transform& transform); ov::Tensor eval() const; ov::Tensor get_orig_tensor() const; - + std::size_t get_hash() const; bool has_transformations() const; private: - std::vector> m_transforms; - void* m_orig_data = nullptr; - ov::Shape m_orig_shape; - ov::element::Type m_orig_type; + std::shared_ptr m_impl = nullptr; }; } // namespace weights diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 811f7cb5666617..29f568a75f2dfd 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -1460,8 +1460,9 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) { new_param_idx++; LOG_DEBUG("Register " << prod_output << " in the function closure"); - funcall._transformations.push_back(LazyTensor( - TransformType::TENSOR, std::dynamic_pointer_cast(input_node))); // (n)/1/i/c + funcall._lazy_closure.push_back( + LazyTensor(TransformType::TENSOR, + std::dynamic_pointer_cast(input_node))); // (n)/1/i/c } else if (ov::op::util::is_parameter(input_node)) { LOG_DEBUG("Handling a Parameter input " << prod_output); LOG_BLOCK(); @@ -1536,7 +1537,7 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) { LOG_BLOCK(); const auto& function = func_iter->second; funcall._closure.resize(function._num_params_total - function._param_offset); - funcall._transformations.resize(function._num_params_total - function._param_offset); + funcall._lazy_closure.resize(function._num_params_total - function._param_offset); auto tmp_model = *mod_iter; for (auto&& node_ptr : tmp_model->get_ordered_ops()) { @@ -1557,8 +1558,9 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) { std::make_pair(proto_layer_name, input_desc.get_index())); // (t)/1/b LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx << "] (via prototype " << proto_layer_name << ")"); - funcall._transformations[param_idx - function._param_offset] = LazyTensor( - TransformType::TENSOR, std::dynamic_pointer_cast(input_node)); // (t)/1/c + funcall._lazy_closure[param_idx - function._param_offset] = + LazyTensor(TransformType::TENSOR, + std::dynamic_pointer_cast(input_node)); // (t)/1/c } } // for (inputs) } // for(nodes) @@ -1583,7 +1585,7 @@ void Partitioner::optimize(const std::string& func_name) { auto closure_idx = param_idx - f._param_offset; ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) { auto& funcall = func_group.refs[f_idx].get(); - funcall._transformations[closure_idx].update(TransformType::PERMUTE, p.second); + funcall._lazy_closure[closure_idx].update(TransformType::PERMUTE, p.second); }); } }; @@ -1593,7 +1595,7 @@ void Partitioner::optimize(const std::string& func_name) { auto closure_idx = param_idx - f._param_offset; ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) { auto& funcall = func_group.refs[f_idx].get(); - funcall._transformations[closure_idx].update(TransformType::CONVERT, std::monostate{}); + funcall._lazy_closure[closure_idx].update(TransformType::CONVERT, std::monostate{}); }); } }; @@ -1625,8 +1627,6 @@ void Partitioner::optimize(const std::string& func_name) { // Run parallel matmul merge mergeParallelMatMuls(f._model, ctx); - // Mark LazyTensors to be concatenated later - // Note: closures are properly processed later as well ov::ParameterVector new_params; std::vector to_remove; std::set to_remove_idx; @@ -1651,12 +1651,13 @@ void Partitioner::optimize(const std::string& func_name) { for (auto&& cidx : to_concat_idx) { // FIXME: Assuming here concat goes first and other transformations later. // This allows to store ov::Tensor and ignore their potential history of transformations - NPUW_ASSERT(!funcall._transformations[cidx].has_transformations()); - to_concat.push_back(funcall._transformations[cidx]); + NPUW_ASSERT(!funcall._lazy_closure[cidx].has_transformations()); + to_concat.push_back(funcall._lazy_closure[cidx]); } - // Note: we can ignore updating funcall._transformations[cidx] here since those LazyTensors will be gone and the new one added into the vector + // Note: we can ignore updating funcall._lazy_closure[cidx] here since those LazyTensors will be gone + // and the new one added into the vector if (!to_concat.empty()) { - funcall._transformations.push_back(LazyTensor(TransformType::CONCAT, std::make_pair(to_concat, axis))); + funcall._lazy_closure.push_back(LazyTensor(TransformType::CONCAT, std::make_pair(to_concat, axis))); } }); } @@ -1683,15 +1684,15 @@ void Partitioner::optimize(const std::string& func_name) { auto& funcall = func_group.refs[f_idx].get(); // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original // ov::Tensor below - NPUW_ASSERT(!funcall._transformations[w_idx - f._param_offset].has_transformations()); + NPUW_ASSERT(!funcall._lazy_closure[w_idx - f._param_offset].has_transformations()); if (z_idx != -1) { - NPUW_ASSERT(!funcall._transformations[z_idx - f._param_offset].has_transformations()); + NPUW_ASSERT(!funcall._lazy_closure[z_idx - f._param_offset].has_transformations()); } - NPUW_ASSERT(!funcall._transformations[s_idx - f._param_offset].has_transformations()); - ov::Tensor cw = funcall._transformations[w_idx - f._param_offset].get_orig_tensor(); + NPUW_ASSERT(!funcall._lazy_closure[s_idx - f._param_offset].has_transformations()); + ov::Tensor cw = funcall._lazy_closure[w_idx - f._param_offset].get_orig_tensor(); ov::Tensor cz = - z_idx != -1 ? funcall._transformations[z_idx - f._param_offset].get_orig_tensor() : ov::Tensor{}; - ov::Tensor cs = funcall._transformations[s_idx - f._param_offset].get_orig_tensor(); + z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset].get_orig_tensor() : ov::Tensor{}; + ov::Tensor cs = funcall._lazy_closure[s_idx - f._param_offset].get_orig_tensor(); ov::Tensor dst(p.first->get_element_type(), p.first->get_shape()); const auto& gti = ov::get_tensor_impl; @@ -1702,7 +1703,7 @@ void Partitioner::optimize(const std::string& func_name) { } else { NPUW_ASSERT(false && "Unsupported combination"); } - funcall._transformations.push_back(LazyTensor(TransformType::TENSOR, std::move(dst))); + funcall._lazy_closure.push_back(LazyTensor(TransformType::TENSOR, std::move(dst))); }); } @@ -1716,7 +1717,7 @@ void Partitioner::optimize(const std::string& func_name) { for (auto&& funcall : func_group.refs) { auto new_elem_type = params_to_gather.pnew->get_element_type(); auto new_shape = params_to_gather.pnew->get_shape(); - funcall.get()._transformations.push_back( + funcall.get()._lazy_closure.push_back( LazyTensor(TransformType::TENSOR, ov::Tensor(new_elem_type, new_shape))); } } @@ -1728,12 +1729,12 @@ void Partitioner::optimize(const std::string& func_name) { for (auto&& fref : func_group.refs) { auto& funcall = fref.get(); std::vector new_transforms; - for (std::size_t tidx = 0; tidx < funcall._transformations.size(); tidx++) { + for (std::size_t tidx = 0; tidx < funcall._lazy_closure.size(); tidx++) { if (to_remove_idx.count(f._param_offset + tidx) == 0) { - new_transforms.push_back(funcall._transformations[tidx]); + new_transforms.push_back(funcall._lazy_closure[tidx]); } } - funcall._transformations = std::move(new_transforms); + funcall._lazy_closure = std::move(new_transforms); } // Remove parameters that were concatenated for (auto&& now_remove : to_remove) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp index 5599fdb409e3db..17f36115dce456 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp @@ -44,7 +44,7 @@ struct Subgraph { std::vector _zerops; // Zero points for manual unpacking // Stores transformation history for weights which will be applied before inference - std::vector _transformations; + std::vector _lazy_closure; struct Gather { // NB.: int64_t is strange but it is used by OV to refer to parameters diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp index e6996b8428f21a..d9e808272a8d79 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp @@ -124,30 +124,30 @@ void apply_remap(Subgraph& fcall, const ClosureRemap& m) { // reserve a new_scales vector to have the same size, filled with // empty tensors by default. for (auto&& i : m.closure_remap) { - new_transformations.push_back(fcall._transformations[i]); + new_transformations.push_back(fcall._lazy_closure[i]); auto scale_iter = m.scale_remap.find(i); auto zerop_iter = m.zerop_remap.find(i); // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original // ov::Tensor below if (scale_iter != m.scale_remap.end()) { - NPUW_ASSERT(!fcall._transformations[scale_iter->second].has_transformations()); + NPUW_ASSERT(!fcall._lazy_closure[scale_iter->second].has_transformations()); } if (zerop_iter != m.zerop_remap.end()) { - NPUW_ASSERT(!fcall._transformations[zerop_iter->second].has_transformations()); + NPUW_ASSERT(!fcall._lazy_closure[zerop_iter->second].has_transformations()); } new_scales.push_back(scale_iter != m.scale_remap.end() - ? fcall._transformations[scale_iter->second].get_orig_tensor() + ? fcall._lazy_closure[scale_iter->second].get_orig_tensor() : ov::Tensor()); // Check for asymmetric zero points and add them to new_zerops const auto& zerop = zerop_iter != m.zerop_remap.end() - ? fcall._transformations[zerop_iter->second].get_orig_tensor() + ? fcall._lazy_closure[zerop_iter->second].get_orig_tensor() : m.zero_points[i]; new_zerops.push_back(zerop); } fcall._scales = std::move(new_scales); fcall._zerops = std::move(new_zerops); - fcall._transformations = std::move(new_transformations); + fcall._lazy_closure = std::move(new_transformations); } void finalize_remap(Function& fbody, const ClosureRemap& m) {