Skip to content

Commit

Permalink
Merge branch 'master' into as/npuw_online_part_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dmatveev authored Oct 2, 2024
2 parents bf2b8d8 + cddcfe8 commit 582fca4
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
}
auto process_params = [&](const ov::ParameterVector& _parameters) {
for (size_t i = 0; i < _parameters.size(); i++) {
NPUW_ASSERT(_parameters[i]);
LOG_VERB(_parameters[i]);
for (size_t j = 0; j < orig_parameters.size(); j++) {
if (_parameters[i] == orig_parameters[j]) {
Expand Down
58 changes: 45 additions & 13 deletions src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "partitioning.hpp"

#include <memory>

#include "../logging.hpp"
#include "../util.hpp"
#include "intel_npu/al/config/npuw.hpp"
Expand All @@ -20,6 +22,26 @@
#include "patterns/dcoff.hpp"
#include "patterns/opt.hpp"

namespace ov {
namespace npuw {
inline bool operator==(const std::reference_wrapper<Subgraph>& lhs, const std::reference_wrapper<Subgraph>& rhs) {
ov::npuw::Subgraph& llink = lhs.get();
ov::npuw::Subgraph& rlink = rhs.get();
return &llink == &rlink;
}
} // namespace npuw
} // namespace ov

template <typename T2>
struct std::hash<std::pair<ov::npuw::Subgraph::Ref, T2>> {
std::size_t operator()(std::pair<ov::npuw::Subgraph::Ref, T2> const& p) const noexcept {
ov::npuw::Subgraph& sg = p.first.get();
std::size_t h1 = std::hash<void*>{}(&sg);
std::size_t h2 = std::hash<T2>{}(p.second);
return h1 ^ (h2 << 1);
}
};

namespace {

class FuncallEverywhere {
Expand Down Expand Up @@ -161,6 +183,8 @@ class Partitioner {

using PPtr = std::shared_ptr<ov::op::v0::Parameter>;
using RPtr = std::shared_ptr<ov::op::v0::Result>;
using SubgParam = std::pair<ov::npuw::Subgraph::Ref, PPtr>;
using SubgResult = std::pair<ov::npuw::Subgraph::Ref, RPtr>;
using LinkPtrTo = std::pair<size_t /*submodel_idx*/
,
PPtr /*param ptr*/
Expand All @@ -182,8 +206,8 @@ class Partitioner {

// Map every function call instance' Parameter and result
// back to its prototype Parameter and Result
std::unordered_map<PPtr, PPtr> param_call_to_proto;
std::unordered_map<RPtr, RPtr> result_call_to_proto;
std::unordered_map<SubgParam, PPtr> param_call_to_proto;
std::unordered_map<SubgResult, RPtr> result_call_to_proto;
};
std::map<std::string, FunctionPipeline> all_functions;

Expand All @@ -203,7 +227,10 @@ class Partitioner {
void createFunction(FunctionPipeline& func_ggg);

template <typename T, typename M>
void rearrange_to_function_protocol(const std::vector<T>& protocol, std::vector<T>& call, const M& call_to_proto) {
void rearrange_to_function_protocol(ov::npuw::Subgraph::Ref func_ref,
const std::vector<T>& protocol,
std::vector<T>& call,
const M& call_to_proto) {
LOG_DEBUG("Rearranging...");
LOG_BLOCK();
LOG_DEBUG("Protocol: " << protocol.size());
Expand All @@ -215,7 +242,7 @@ class Partitioner {
LOG_DEBUG("Call: " << call.size());
for (auto&& c : call) {
LOG_BLOCK();
auto p_c = call_to_proto.at(c);
auto p_c = call_to_proto.at(typename M::key_type(func_ref, c));
to_proto.push_back(p_c);
LOG_DEBUG(c << " (which is " << p_c << ")");
}
Expand Down Expand Up @@ -536,7 +563,7 @@ void Partitioner::identifySubgraphs() {
LOG_VERB("Processing group's output layer " << output_layer_name);
LOG_BLOCK();
auto output_layer_ptr = node_id_cache.at(output_layer_name);
if (output_layer_ptr->inputs().empty()) {
if (output_layer_ptr->outputs().empty()) {
OPENVINO_THROW("The group's output layer ",
output_layer_name,
" has NO OUTPUTS!! - Graph contracts are broken??");
Expand Down Expand Up @@ -1327,9 +1354,12 @@ void Partitioner::matchParameters(const std::string& func_name) {

// Now walk other submodels and match parameters with the same key
// (yes, including the first one)
for (auto&& call : model_group) {
for (std::size_t call_id = 0; call_id < model_group.size(); ++call_id) {
LOG_DEBUG("Handle function call...");
LOG_BLOCK();
auto call = model_group[call_id];
auto subg_ref = func.refs[call_id];

std::unordered_set<ov::Node*> this_model_nodes;
for (auto&& node_ptr : call->get_ordered_ops()) {
this_model_nodes.insert(node_ptr.get());
Expand All @@ -1348,7 +1378,7 @@ void Partitioner::matchParameters(const std::string& func_name) {
LOG_DEBUG("Find orig parameter for " << node);
auto& orig_param = proto_parameters.at(pkey);
auto this_param = std::dynamic_pointer_cast<PPtr::element_type>(node);
func.param_call_to_proto[this_param] = orig_param;
func.param_call_to_proto[SubgParam(subg_ref, this_param)] = orig_param;
}
}
}
Expand Down Expand Up @@ -1386,14 +1416,16 @@ void Partitioner::matchResults(const std::string& func_name) {

// Now walk all submodels and match parameters with the same key
// (yes, including the first one)
for (auto&& call : model_group) {
for (std::size_t call_idx = 0; call_idx < model_group.size(); ++call_idx) {
auto call = model_group[call_idx];
auto subg_ref = func.refs[call_idx];
for (auto&& node : call->get_ordered_ops()) {
if (ov::op::util::is_output(node)) {
auto&& port = node->input(0).get_source_output();
RKey rkey = {layer_to_prototype.at(port.get_node()->get_friendly_name()), port.get_index()};
auto& orig_result = proto_results.at(rkey);
auto this_result = std::dynamic_pointer_cast<RPtr::element_type>(node);
func.result_call_to_proto[this_result] = orig_result;
func.result_call_to_proto[SubgResult(subg_ref, this_result)] = orig_result;
}
}
}
Expand Down Expand Up @@ -1517,8 +1549,8 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
funcall._gflops = this_sg._gflops; // duplicated code again!
funcall._ops = this_sg._ops; // duplicated code again!
funcall._avoid_list = this_sg._avoid_list; // duplicated code again!
rearrange_to_function_protocol(body_params, funcall._parameters, func_ggg.param_call_to_proto);
rearrange_to_function_protocol(body_results, funcall._results, func_ggg.result_call_to_proto);
rearrange_to_function_protocol(this_sg, body_params, funcall._parameters, func_ggg.param_call_to_proto);
rearrange_to_function_protocol(this_sg, body_results, funcall._results, func_ggg.result_call_to_proto);

auto func_iter = P.functions.find(func_name);
NPUW_ASSERT(func_iter != P.functions.end());
Expand Down Expand Up @@ -1883,7 +1915,7 @@ void Partitioner::finalizeLinks() {
auto& params = P.functions.at(sg_desc._funcall)._model->get_parameters();
auto& proto = func_pipeline_type == FunctionPipelineType::CWAI
? ptr // no protos in the CWAI case..
: all_functions.at(sg_desc._funcall).param_call_to_proto.at(ptr);
: all_functions.at(sg_desc._funcall).param_call_to_proto.at(SubgParam(sg_desc, ptr));
auto param_iter = std::find(params.begin(), params.end(), proto);
NPUW_ASSERT(param_iter != params.end());
return std::distance(params.begin(), param_iter);
Expand All @@ -1904,7 +1936,7 @@ void Partitioner::finalizeLinks() {
auto& results = P.functions.at(sg_desc._funcall)._model->get_results();
auto& proto = func_pipeline_type == FunctionPipelineType::CWAI
? ptr // no protos in the CWAI case...
: all_functions.at(sg_desc._funcall).result_call_to_proto.at(ptr);
: all_functions.at(sg_desc._funcall).result_call_to_proto.at(SubgResult(sg_desc, ptr));
auto result_iter = std::find(results.begin(), results.end(), proto);
NPUW_ASSERT(result_iter != results.end());
return std::distance(results.begin(), result_iter);
Expand Down
8 changes: 5 additions & 3 deletions src/plugins/intel_npu/tools/single-image-test/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,8 @@ bool computeRRMSE(const ov::Tensor& output, const ov::Tensor& reference) {

double rrmseLoss = sqrt(error / sum);

std::cout << "RRMSE loss : " << rrmseLoss << " RRMSE threshold : " << FLAGS_rrmse_loss_threshold << std::endl;
std::cout << "RRMSE loss : " << std::fixed << std::setprecision(4) << rrmseLoss
<< " RRMSE threshold : " << FLAGS_rrmse_loss_threshold << std::endl;
return rrmseLoss <= FLAGS_rrmse_loss_threshold;
}

Expand Down Expand Up @@ -1267,7 +1268,8 @@ bool computeNRMSE(const ov::Tensor& output, const ov::Tensor& reference) {
double nrmseLoss =
sqrt(error / size) / std::max(0.001f, std::max(maxOutput - minOutput, maxReference - minReference));

std::cout << "NRMSE loss : " << nrmseLoss << " NRMSE threshold : " << FLAGS_nrmse_loss_threshold << std::endl;
std::cout << "NRMSE loss : " << std::fixed << std::setprecision(4) << nrmseLoss
<< " NRMSE threshold : " << FLAGS_nrmse_loss_threshold << std::endl;
return nrmseLoss <= FLAGS_nrmse_loss_threshold;
}

Expand Down Expand Up @@ -1319,7 +1321,7 @@ bool testPSNR(const TensorMap& outputs, const TensorMap& references, const int d

auto result = utils::runPSNRMetric(actOutput, refOutput, dstHeight, dstWidth, scaleBorder, normalizedImage);

if (std::fabs(result - FLAGS_psnr_reference) > FLAGS_psnr_tolerance) {
if (FLAGS_psnr_reference - result > FLAGS_psnr_tolerance) {
std::cout << "Absolute difference between actual value " << result << " and reference value "
<< FLAGS_psnr_reference << " larger then tolerance " << FLAGS_psnr_tolerance << std::endl;
return false;
Expand Down

0 comments on commit 582fca4

Please sign in to comment.