Skip to content

Commit

Permalink
Speed up applying transformations
Browse files Browse the repository at this point in the history
  • Loading branch information
smirnov-alexey committed Oct 3, 2024
1 parent d82f570 commit f47c9f1
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 5 deletions.
27 changes: 25 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,10 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
}

void ov::npuw::CompiledModel::finalize_weights_bank() {
// Evaluate lazy tensors which aren't in the bank beforehand
std::vector<std::vector<ov::Tensor>> evaluated_tensors(m_compiled_submodels.size());
ov::parallel_for(m_compiled_submodels.size(), [&](std::size_t idx) {
evaluated_tensors[idx].resize(m_compiled_submodels[idx].lazy_closure.size());
auto& comp_model_desc = m_compiled_submodels[idx];

if (!comp_model_desc.replaced_by) {
Expand All @@ -423,17 +426,37 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
const auto real_idx = comp_model_desc.replaced_by.value();
auto& func_desc = m_compiled_submodels[real_idx];

for (std::size_t tidx = 0; tidx < comp_model_desc.lazy_closure.size(); ++tidx) {
const auto& lt = m_compiled_submodels[idx].lazy_closure[tidx];
if (m_weights_bank->has(lt, *func_desc.device_it)) {
continue;
}
evaluated_tensors[idx][tidx] = lt.eval();
}
});

for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
auto& comp_model_desc = m_compiled_submodels[idx];

if (!comp_model_desc.replaced_by) {
continue;
}

const auto real_idx = comp_model_desc.replaced_by.value();
auto& func_desc = m_compiled_submodels[real_idx];

// Due to concat some tensor should be skipped in closure
m_compiled_submodels[idx].closure.resize(0);
m_compiled_submodels[idx].is_remote.resize(0);

for (std::size_t tidx = 0; tidx < comp_model_desc.lazy_closure.size(); ++tidx) {
const auto& lt = m_compiled_submodels[idx].lazy_closure[tidx];
m_compiled_submodels[idx].closure.push_back(m_weights_bank->get(lt, *func_desc.device_it));
const auto& evaled = evaluated_tensors[idx][tidx];
m_compiled_submodels[idx].closure.push_back(m_weights_bank->get(lt, *func_desc.device_it, evaled));
// FIXME: should is_remote be set unconditionally?
m_compiled_submodels[idx].is_remote.push_back(true);
}
});
}
}

void ov::npuw::CompiledModel::remove_long_output_names(const std::shared_ptr<ov::Model>& model) {
Expand Down
15 changes: 13 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class BankManager {
std::mutex m_mutex;
};

ov::Tensor Bank::get(const LazyTensor& tensor, const std::string& device) {
ov::Tensor Bank::get(const LazyTensor& tensor, const std::string& device, const ov::Tensor& evaled) {
if (device != "CPU" && device != "NPU") {
OPENVINO_THROW("Unsupported device in weights bank allocation: ", device);
}
Expand All @@ -45,7 +45,7 @@ ov::Tensor Bank::get(const LazyTensor& tensor, const std::string& device) {
auto& device_bank = m_device_bank[device];
auto iter_device = device_bank.find(tensor);
if (iter_device == device_bank.end()) {
ov::Tensor transformed_tensor = tensor.eval();
ov::Tensor transformed_tensor = evaled ? evaled : tensor.eval();

if (device == "CPU" || m_alloc_device != device) {
// No allocation - store as is
Expand All @@ -66,6 +66,17 @@ ov::Tensor Bank::get(const LazyTensor& tensor, const std::string& device) {
return iter_device->second;
}

bool Bank::has(const LazyTensor& tensor, const std::string& device) {
if (device != "CPU" && device != "NPU") {
OPENVINO_THROW("Unsupported device in weights bank allocation: ", device);
}

std::lock_guard<std::mutex> guard(m_mutex);

const auto& device_bank = m_device_bank[device];
return device_bank.find(tensor) != device_bank.end();
}

std::shared_ptr<Bank> BankManager::getBank(const std::string& bank_name,
const std::shared_ptr<const ov::ICore>& core,
const std::string& alloc_device) {
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class Bank {
m_alloc_device(alloc_device) {}

// Based on previously captured lazy tensor allocate a new tensor (if needed) on a specified device
ov::Tensor get(const LazyTensor& tensor, const std::string& device);
ov::Tensor get(const LazyTensor& tensor, const std::string& device, const ov::Tensor& evaled = ov::Tensor());
bool has(const LazyTensor& tensor, const std::string& device);

private:
// Bank for specified device and their allocated memory
Expand Down

0 comments on commit f47c9f1

Please sign in to comment.