diff --git a/src/common/snippets/include/snippets/pass/fc_tokenization.hpp b/src/common/snippets/include/snippets/pass/fc_tokenization.hpp new file mode 100644 index 00000000000000..40505607341ba4 --- /dev/null +++ b/src/common/snippets/include/snippets/pass/fc_tokenization.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "snippets/pass/tokenization.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface TokenizeFCSnippets + * @brief The pass tokenizes FullyConnected like (with constant path on B input) MatMuls + * @ingroup snippets + */ +class TokenizeFCSnippets: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("TokenizeFCSnippets", "0"); + TokenizeFCSnippets(const SnippetsTokenization::Config& config); +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/utils/tokenization_utils.hpp b/src/common/snippets/include/snippets/utils/tokenization_utils.hpp new file mode 100644 index 00000000000000..6612560cb2b0e9 --- /dev/null +++ b/src/common/snippets/include/snippets/utils/tokenization_utils.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief A file contains tokenization related utilities. + * @file tokenization_utils.hpp + */ +#pragma once + +#include "snippets/op/subgraph.hpp" +#include "snippets/pass/tokenization.hpp" + +namespace ov { +namespace snippets { +namespace utils { +/** + * @brief Tokenizes a node into Subgraph. 2 options are possible (depending on config's values and internal logic)L + * 1. The node is wrapped in a trivial Subgraph which contains only this node + * 2. The node is fused in parent's Subgraphs + * @param node node which should be tokenized + * @param config tokenization config which regulates + * @return whether the node was tokenized or not + */ +bool tokenize_node(const std::shared_ptr& node, const ov::snippets::pass::SnippetsTokenization::Config& config); +} // namespace utils +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 7d55129b3241a8..0f0cc225173479 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -22,6 +22,7 @@ #include "snippets/pass/transpose_decomposition.hpp" #include "snippets/remarks.hpp" #include "snippets/utils/utils.hpp" +#include "snippets/utils/tokenization_utils.hpp" #include "transformations/utils/utils.hpp" namespace ov { @@ -30,19 +31,6 @@ namespace pass { namespace { - -auto outputs_are_not_broadcastable(const std::shared_ptr& node) -> bool { - const auto& outputs = node->outputs(); - if (outputs.size() <= 1) - return false; - ov::PartialShape ref_shape = outputs.front().get_partial_shape(); - bool success = true; - for (size_t i = 1; i < outputs.size() && success; i++) { - success &= ov::PartialShape::broadcast_merge_into(ref_shape, outputs[i].get_partial_shape(), ov::op::AutoBroadcastType::NUMPY); - } - return !success; -} - auto is_supported_op(const std::shared_ptr &n) -> bool { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::is_supported_op") auto is_supported_matmul = [](const std::shared_ptr& n) -> bool { @@ -209,25 +197,6 @@ auto has_supported_in_out(const std::shared_ptr &n) -> bool { return std::all_of(inputs.begin(), inputs.end(), [&](const Input& in) {return supported(in.get_tensor());}) && std::all_of(outputs.begin(), outputs.end(), [&](const Output& out) {return supported(out.get_tensor());}); } - -auto has_result_child(const std::shared_ptr &node) -> bool { - for (const auto& child : node->get_users()) { - if (ov::is_type(child)) { - return true; - } - } - return false; -} - -auto get_num_result_children(const std::shared_ptr &node) -> size_t { - size_t result = 0; - for (const auto& child : node->get_users()) { - if (ov::is_type(child)) { - result++; - } - } - return result; -} } // namespace const std::set& ov::snippets::pass::TokenizeSnippets::get_supported_element_types() { @@ -249,14 +218,10 @@ bool TokenizeSnippets::AppropriateForSubgraph(const std::shared_ptr TokenizeSnippets::TokenizeSnippets(const SnippetsTokenization::Config& config) { MATCHER_SCOPE(TokenizeSnippets); - enum continuation_strategy { - reset, - abort - }; - continuation_strategy strategy = continuation_strategy::reset; - auto label = std::make_shared(ov::pass::pattern::any_input(), - [](const std::shared_ptr &n) { + auto label = ov::pass::pattern::any_input( + [](ov::Output out) { + const auto n = out.get_node_shared_ptr(); // todo: MatMul and Transpose ops are always skipped by the SnippetsMarkSkipped pass. // This is a temporary solution. Either modify SnippetsMarkSkipped // or align this with the custom MHA tokenization pass. @@ -270,395 +235,8 @@ TokenizeSnippets::TokenizeSnippets(const SnippetsTokenization::Config& config) { if (transformation_callback(node)) { return false; } - remark(1) << "Match root: " << node->get_friendly_name() << " " << node << std::endl; - - const auto getFusedNames = [](const std::shared_ptr& n) -> std::string { - auto rt_info = n->get_rt_info(); - auto it = rt_info.find("originalLayersNames"); - if (it != rt_info.end()) { - return it->second.as() + ","; - } - return ""; - }; - - auto create_single_node_subgraph = [&](const std::shared_ptr &node) { - auto subgraph = op::Subgraph::wrap_node_as_subgraph(node); - subgraph->get_rt_info()["originalLayersNames"] = getFusedNames(node) + node->get_friendly_name(); - ov::replace_node(node, subgraph); - op::update_out_tensor_name(subgraph); - }; - - auto abort_with_strategy = [&](const std::string& message_reset, - const std::string& message_abort = "", int priority = 3) { - if (strategy == continuation_strategy::reset) { - create_single_node_subgraph(node); - return true; - } else if (strategy == continuation_strategy::abort) { - if (!message_abort.empty()) { - remark(priority) << message_abort << std::endl; - } - } - return false; - }; - // inputs that are already subgraphs - std::unordered_set> input_subgraphs; - // clone bodies because we need a rollback if loop is found - std::map, std::shared_ptr> clones; - - ParameterVector body_parameters; - // inputs to merged subgraph - OutputVector external_inputs; - // inputs to the node before merge to subgraph - OutputVector internal_inputs; - // nodes whose rt_info should be copied into result subgraph - NodeVector replaced_nodes{node}; - - auto input_values = node->input_values(); - /* - * Called with subgraph->input_value(i) arg and used to - * Check that the attached node input subgraph has the same input as the node itself. - * If true, then ternary merge is initiated. - * input - * / \ - * subgraph--node - */ - auto is_recurrent = [&input_values](const ov::Output& to_find) -> bool { - return std::any_of(input_values.begin(), input_values.end(), - [&](const ov::Output &in) {return in == to_find;}); - }; - /* - * Checks if the passed node introduces loop dependency for given topological bounds (pair of maxParentOrder, minChildOrder). - * The bounds are presumed to be without dependency. The bounds are updated if no dependency is introduced by the node. - */ - const auto cyclicDependencyIsIntoduced = [&node](const std::shared_ptr& nodeToExamine, std::pair& currentBounds) -> bool { - assert(currentBounds.first < currentBounds.second && "Invalid currentBounds passed"); - const auto& parentNodes = ov::as_node_vector(nodeToExamine->input_values()); - const int64_t maxParentOrder = std::accumulate(parentNodes.begin(), parentNodes.end(), currentBounds.first, - [](int64_t maxOrder, std::shared_ptr n){ - if (ov::is_type(n) || ov::is_type(n)) - return maxOrder; - return std::max(maxOrder, GetTopologicalOrder(n)); - }); - const auto& childNodes = nodeToExamine->get_users(); - // Skip the node being attached, since it will be a part of subgraph and can't introduce loop dependency - const int64_t minChildOrder = std::accumulate(childNodes.begin(), childNodes.end(), currentBounds.second, - [&node](int64_t minOrder, std::shared_ptr n){ - if (ov::is_type(n) || n == node) - return minOrder; - return std::min(minOrder, GetTopologicalOrder(n)); - }); - if (maxParentOrder < minChildOrder) { - currentBounds = std::pair(maxParentOrder, minChildOrder); - return false; - } - return true; - }; - - for (const auto& input_node : ov::as_node_vector(input_values)) { - if (auto subgraph = ov::as_type_ptr(input_node)) { - if (!clones.count(input_node) && GetSnippetsSubgraphType(subgraph) != SnippetsSubgraphType::Completed) { - auto f = subgraph->body().clone(); - f->set_friendly_name(subgraph->body_ptr()->get_friendly_name()); - clones[input_node] = f; - } - } - } - // If there are no input subgraphs no need to go further, just create a new one. - if (clones.empty()) { - create_single_node_subgraph(node); - remark(1) << "Starting subgraph at: " << node->get_friendly_name() - << " with " << node->inputs().size() << " inputs and " << node->outputs().size() - << " outputs" << std::endl; - return true; - } - std::string subgraph_name = node->get_friendly_name(); - std::string fusedNames{}; - size_t num_result_children = 0; - std::pair currentTopoBounds {-1, LONG_MAX}; - cyclicDependencyIsIntoduced(node, currentTopoBounds); - assert(!cyclicDependencyIsIntoduced(node, currentTopoBounds) && "Cyclic dependency is introduced by the node itself"); - for (const auto& input_value : input_values) { - auto input_node = input_value.get_node_shared_ptr(); - if (ov::is_type(input_node) && - !cyclicDependencyIsIntoduced(input_node, currentTopoBounds)) { - auto subgraph = std::static_pointer_cast(input_node); - if (!input_subgraphs.count(input_node)) { - input_subgraphs.insert(input_node); - - fusedNames += getFusedNames(subgraph); - replaced_nodes.push_back(subgraph); - - if (has_result_child(subgraph)) { - // we set input subgraph name to the current subgraph - // in order to save node friendly name before result - subgraph_name = subgraph->get_friendly_name(); - num_result_children += 1; - } - auto f = clones[input_node]; - const auto& input_body_parameters = f->get_parameters(); - // Todo: - // Some of the input subgraphs might have common parents, so some of the input_parameters might already be - // in external_inputs and hence in body_parameters. Here we handle this case and remove repeated body_parameters. - // Would it be better to incorporate all inputs first and then remove repeated params. - for (size_t i = 0; i < input_body_parameters.size(); ++i) { - auto found = std::find(external_inputs.begin(), external_inputs.end(), subgraph->input_value(i)); - if (found != external_inputs.end()) { - // Todo: here we rely on friendly_name uniqueness. Propose a different algorithm. - size_t current_input_index = body_parameters.size(); - for (size_t p_ind = 0; p_ind < body_parameters.size(); p_ind++) { - const auto& p = body_parameters[p_ind]; - // unite two body parameters from two input subgraphs only if: - // 1. two input subgraphs are connected to the same parent node/subgraph, - // 2. and connected to the same output port of this parent node/subgraph. - if (p->get_friendly_name() == found->get_node_shared_ptr()->get_friendly_name() && - external_inputs[p_ind] == *found) { - current_input_index = p_ind; - break; - } - } - - if (current_input_index < body_parameters.size()) { - remark(13) << "replacing " << *found << " " << current_input_index << " with " - << body_parameters[current_input_index] << std::endl; - f->replace_parameter(i, body_parameters[current_input_index]); - } else { - external_inputs.push_back(subgraph->input_value(i)); - body_parameters.push_back(input_body_parameters[i]); - } - } else if (is_recurrent(subgraph->input_value(i))) { - remark(13) << "ternary merge is conducted " << subgraph->input_value(i).get_node_shared_ptr() << std::endl; - - auto internal = input_body_parameters[i]; - auto internal_consumers = internal->outputs(); - if (auto to_replace_with = ov::as_type_ptr(subgraph->get_input_node_shared_ptr(i))) { - // todo: In principle, we can still attach the node to the subgraph if cyclic dependency is introduced during ternary merge. - // Need to support. - if (cyclicDependencyIsIntoduced(to_replace_with, currentTopoBounds)) - return abort_with_strategy("Attempt to perform recurrent merge for cyclic-dependent subgraphs. Aborting."); - for (const auto& output : internal_consumers) { - for (auto consumer : output.get_target_inputs()) { - auto other_body = clones[subgraph->get_input_node_shared_ptr(i)]; - auto other_body_result = other_body->get_results()[consumer.get_source_output().get_index()]; - auto result_producer = other_body_result->input(0).get_source_output(); - - consumer.replace_source_output(result_producer.get_node_shared_ptr()); - } - } - } else { - external_inputs.push_back(subgraph->input_value(i)); - body_parameters.push_back(input_body_parameters[i]); - } - } else { - external_inputs.push_back(subgraph->input_value(i)); - body_parameters.push_back(input_body_parameters[i]); - } - } - } - - // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs - // internal output index == external output index - auto& input_body = clones[input_node]; - size_t source_output_index = input_value.get_index(); - auto source_result = input_body->get_results()[source_output_index]; - - // We cannot add new node, that is not Convert, after Convert (that is start node) to avoid arithmetic problems with conversion - // We can add any new node in Subgraph after Convert (bacause after Input) - // Parameter - // | - // Convert - // - // We cannot add new node, that isn't Convert, in Subgraph after existing Convert - // Parameter - // Relu - // Convert - // - // But we can add new Convert in Subgraph after existing Convert - // Parameter - // Relu - // Convert - // Convert - // - // Thus, We can grow subgraph only if Convert is the first node of subgraph and have to abort it's the last one and we want to add not Convert - // We have this limitation because at the moment we support only one execution precision inside body, so - // if there is Convert with input and output data types that aren't equal to supported exec type, - // we can get conversion math errors - const auto output_of_subgraph = source_result->get_input_node_shared_ptr(0); - if (!ov::is_type(node) && ov::is_type(output_of_subgraph)) { - // Also we can add new node after < Parameter -> Convert -> Convert -> Convert > - auto grandparent = output_of_subgraph->get_input_node_ptr(0); - while (ov::is_type(grandparent)) { - grandparent = grandparent->get_input_node_ptr(0); - } - - if (!ov::is_type(grandparent)) { - return abort_with_strategy("Convert supports only as Input and as Result of subgraph. Aborting"); - } - } - // Result op has a single input - internal_inputs.push_back(source_result->input_value(0)); - } else { - // We need some non-scalar constants inside Subgraph in the following cases: - // [*] We have to save explicitly FQ Constants to call ConstantFolding after Tokenization. - // After ConstantFolding we will move remaining non-scalar Constants from body using ConvertConstantsToParameters pass - // [*] We support Transpose with second Constant input (represents order). This Constant will not be scheduled - // and will only be used to decompose Transpose into a proper Load, Store and Loop combination. - if (ov::is_type(input_node) && - (ov::shape_size(input_value.get_shape()) == 1 || - ov::is_type(node) || - op::Subgraph::constant_input_should_be_inside_body(node))) { - internal_inputs.push_back(input_node->output(0)); - } else { - external_inputs.push_back(input_value); - auto new_parameter = std::make_shared(input_value.get_element_type(), input_value.get_partial_shape()); - new_parameter->set_friendly_name(input_node->get_friendly_name()); - body_parameters.push_back(new_parameter); - internal_inputs.push_back(new_parameter->output(0)); - } - } - } - fusedNames += node->get_friendly_name(); - num_result_children += get_num_result_children(node); - if (num_result_children > 1) - return abort_with_strategy("New subgraph is created since too many Result children are detected"); - - auto body_node = node->copy_with_new_inputs(internal_inputs); - body_node->set_friendly_name(node->get_friendly_name()); - - remark(1) << "Original node outputs = " << node->get_output_size() - << " body node outputs = " << body_node->get_output_size() << std::endl; - - if (node->get_output_size() != body_node->get_output_size()) { - OPENVINO_THROW("original node outputs size and extracted node outputs size doesn't much"); - } - - // After some transformations, a different number of Constants for some operations may be created - // than the actual number of Constants during tokenization. - // To avoid unsupported number of non-scalar Constants in the future (plugin specific limitation) - // we should calculate potentional number of non-scalar Constants that will be moved up from body. - size_t hidden_data_count = 0; - if (const auto fq_node = ov::as_type_ptr(node)) { - hidden_data_count += ov::snippets::utils::get_non_scalar_constant_count_for_fq(fq_node); - } - - ResultVector body_results; - std::vector>> subgraph_result_inputs; - - ov::NodeVector ops_for_buffer_count; - for (auto subgraph : input_subgraphs) { - // we should summurize additional needed data count (non-scalar Constants and Buffers) from all input subgraphs - // because we will collapse them with our node and we should get total count - const auto subgraph_ptr = ov::as_type_ptr(subgraph); - hidden_data_count += subgraph_ptr->get_virtual_port_count(); - // Buffers can be existed only in Subgraphs with domain sensetive ops which - // requires intermediate memory for data repacking - // To avoid load time regressions, we verify only these Subgraph with domain sensetive ops - if (subgraph_ptr->has_domain_sensitive_ops()) { - const auto ops = subgraph_ptr->body_ptr()->get_ordered_ops(); - ops_for_buffer_count.insert(ops_for_buffer_count.end(), ops.begin(), ops.end()); - } - - for (auto output : subgraph->outputs()) { - bool first_side_consumer = true; - - for (auto target_input : output.get_target_inputs()) { - auto target_node = target_input.get_node()->shared_from_this(); - - if (input_subgraphs.count(target_node)) { - remark(13) << "ternary merge is conducted " << subgraph << " -> " << target_node << std::endl; - } - - if (!input_subgraphs.count(target_node) && target_node != node) { - if (first_side_consumer) { - auto& input_subgraph_body = clones[subgraph]; - body_results.push_back(std::make_shared( - input_subgraph_body->get_results()[output.get_index()]->input_value(0))); - subgraph_result_inputs.push_back({}); - - first_side_consumer = false; - } - - if (!!subgraph_result_inputs.back().count(target_input)) { - OPENVINO_THROW("target input added twice!!!"); - } - // save target input port outside the body - subgraph_result_inputs.back().insert(target_input); - } - } - } - } - - if (op::Subgraph::is_domain_sensitive_op(node)) { - ops_for_buffer_count.push_back(node); - } - - for (auto output : node->outputs()) { - body_results.push_back(std::make_shared(body_node->output(output.get_index()))); - subgraph_result_inputs.push_back(output.get_target_inputs()); - } - - if (body_results.size() != subgraph_result_inputs.size()) { - OPENVINO_THROW("body results and node results size mismatch during subgraph collaps"); - } - - // The each data node (Parameter (and non-Scalar Constants), Result, Buffers with the same ID) requires the own unique GPR. - // At the moment, CPU Plugin has limitation for GPR registers: there are 12 available GPRs, - // and one of them must be reserved for runtime parameters, so only 11 can be used during kernel execution. - // This limitation will be resolved once generator supports gprs spills [75622]. - // TODO [75567]: move this plugin-specific constraint to the plugin callback - const auto unique_buffer_count = op::Subgraph::get_estimated_buffer_count(ops_for_buffer_count); - const size_t max_data_ptr_count = config.get_data_ptr_gpr_count(); - if (body_parameters.size() + body_results.size() + hidden_data_count + unique_buffer_count > max_data_ptr_count) { - const std::string message_reset = "new subgraph is created. Impossible to schedule subgraph with " + - std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " + - std::to_string(hidden_data_count) + " non-scalar constants and " + std::to_string(unique_buffer_count) + "buffers."; - const std::string message_abort = "failed to continue subgraph. Impossible to schedule subgraph with " + - std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " + - std::to_string(hidden_data_count) + " non-scalar constants and " + std::to_string(unique_buffer_count) + "buffers."; - return abort_with_strategy(message_reset, message_abort); - } - - auto body = op::create_body(node->get_friendly_name(), body_results, body_parameters); - for (size_t i = 0; i < body->get_parameters().size(); i++) { - body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); - } - auto subgraph = op::build_subgraph(node, external_inputs, body, subgraph_name); - copy_runtime_info(replaced_nodes, subgraph); - const auto& act_body = subgraph->body(); - for (size_t i = 0; i < act_body.get_parameters().size(); i++) { - act_body.get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); - } - - if (subgraph->get_output_size() != subgraph_result_inputs.size()) { - OPENVINO_THROW("newly create subgraph doesn't much number of results"); - } - - if (outputs_are_not_broadcastable(subgraph)) - return abort_with_strategy("New subgraph is created due to outputs of a subgraph not broadcastable."); - - for (size_t i = 0; i < subgraph->get_output_size(); ++i) { - for (auto target_input : subgraph_result_inputs[i]) { - target_input.replace_source_output(subgraph->output(i)); - } - } - op::update_out_tensor_name(subgraph); - - subgraph->validate_and_infer_types(); - - const auto& act_body1 = subgraph->body(); - for (size_t i = 0; i < act_body1.get_parameters().size(); i++) { - act_body1.get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); - } - subgraph->get_rt_info()["originalLayersNames"] = fusedNames; - subgraph->set_virtual_port_count(hidden_data_count); - - remark(1) << "Replacement (merge) done for: " - << subgraph->get_friendly_name() - << " with " << subgraph->inputs().size() - << " inputs and " << subgraph->outputs().size() - << " outputs and " << subgraph->body_ptr()->get_ops().size() << " ops total\n"; - - return true; + return ov::snippets::utils::tokenize_node(node, config); }; auto matcher = std::make_shared(label, matcher_name); register_matcher(matcher, callback); diff --git a/src/common/snippets/src/pass/fc_tokenization.cpp b/src/common/snippets/src/pass/fc_tokenization.cpp new file mode 100644 index 00000000000000..a75bf0a4fed135 --- /dev/null +++ b/src/common/snippets/src/pass/fc_tokenization.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/fc_tokenization.hpp" + +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "snippets/itt.hpp" +#include "snippets/utils/tokenization_utils.hpp" + +ov::snippets::pass::TokenizeFCSnippets::TokenizeFCSnippets(const SnippetsTokenization::Config& config) { + MATCHER_SCOPE(TokenizeFCSnippets); + // TODO: extend constant path coverage + // Ticket: 153480 + auto constant = ov::pass::pattern::wrap_type(); + auto m_matmul = ov::pass::pattern::wrap_type({ov::pass::pattern::any_input(), constant}); + + auto callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher &m) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::TokenizeFCSnippets") + const auto matmul = m.get_match_root(); + if (transformation_callback(matmul)) { + return false; + } + return ov::snippets::utils::tokenize_node(matmul, config); + }; + + auto matcher = std::make_shared(m_matmul, matcher_name); + register_matcher(matcher, callback); +} diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp index 43733fc196ee83..e472caa1ed215f 100644 --- a/src/common/snippets/src/pass/tokenization.cpp +++ b/src/common/snippets/src/pass/tokenization.cpp @@ -2,16 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" +#include "snippets/pass/tokenization.hpp" +#include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/manager.hpp" -#include "snippets/pass/tokenization.hpp" +#include "snippets/itt.hpp" +#include "snippets/pass/collapse_subgraph.hpp" #include "snippets/pass/common_optimizations.hpp" #include "snippets/pass/extract_reshapes_from_mha.hpp" -#include "snippets/pass/mha_tokenization.hpp" +#include "snippets/pass/fc_tokenization.hpp" #include "snippets/pass/gn_tokenization.hpp" -#include "snippets/pass/collapse_subgraph.hpp" - +#include "snippets/pass/mha_tokenization.hpp" namespace ov { namespace snippets { @@ -81,9 +82,16 @@ bool SnippetsTokenization::run_on_model(const std::shared_ptr& m) { manager.register_pass(); manager.register_pass(); + // This pass mustn't be registered in GraphRewrite with other tokenization passes because of 2 reasons: + // 1. It has higher priority than other tokenization passes + // 2. It changes the nodes after the matched root node manager.register_pass(m_config); - manager.register_pass(); - manager.register_pass(m_config); + + auto tokenization_passes = manager.register_pass(); + tokenization_passes->add_matcher(); + tokenization_passes->add_matcher(m_config); + tokenization_passes->add_matcher(m_config); + manager.register_pass(m_config); manager.run_passes(m); diff --git a/src/common/snippets/src/shape_inference/shape_infer_instances.cpp b/src/common/snippets/src/shape_inference/shape_infer_instances.cpp index c456b6e2ba0254..a3e3d9652c0ac8 100644 --- a/src/common/snippets/src/shape_inference/shape_infer_instances.cpp +++ b/src/common/snippets/src/shape_inference/shape_infer_instances.cpp @@ -197,16 +197,9 @@ Result BrgemmShapeInfer::infer(const std::vector& input_shapes) { size_t max_rank = arg0_shape_tmp.size(); VectorDims output_shape(max_rank); for (size_t i = 0; i < max_rank - 2; ++i) { - if (arg0_shape_tmp[i] == arg1_shape_tmp[i]) { - output_shape[i] = arg0_shape_tmp[i]; - } else { - if (arg0_shape_tmp[i] == 1 || utils::is_dynamic_value(arg0_shape_tmp[i])) - output_shape[i] = arg1_shape_tmp[i]; - else if (arg1_shape_tmp[i] == 1 || utils::is_dynamic_value(arg1_shape_tmp[i])) - output_shape[i] = arg0_shape_tmp[i]; - else - OPENVINO_THROW("Incompatible Brgemm batch dimension"); - } + if (!utils::broadcast_merge_dim(output_shape[i], arg0_shape_tmp[i], arg1_shape_tmp[i])) + OPENVINO_THROW("Incompatible MatMul batch dimension. Can't merge dim ", arg0_shape_tmp[i], + " with dim ", arg1_shape_tmp[i], " at index=", i); } output_shape[output_shape.size() - 2] = arg0_shape_tmp[arg0_shape_tmp.size() - 2]; // M output_shape[output_shape.size() - 1] = arg1_shape_tmp[arg1_shape_tmp.size() - 1]; // N diff --git a/src/common/snippets/src/utils/tokenization_utils.cpp b/src/common/snippets/src/utils/tokenization_utils.cpp new file mode 100644 index 00000000000000..700b282f86f4d4 --- /dev/null +++ b/src/common/snippets/src/utils/tokenization_utils.cpp @@ -0,0 +1,432 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "snippets/utils/tokenization_utils.hpp" +#include "snippets/remarks.hpp" + +namespace ov { +namespace snippets { +namespace utils { + +using namespace ov::snippets::op; +using namespace ov::snippets::pass; + +namespace { +auto has_result_child(const std::shared_ptr &node) -> bool { + for (const auto& child : node->get_users()) { + if (ov::is_type(child)) { + return true; + } + } + return false; +} + +auto get_num_result_children(const std::shared_ptr &node) -> size_t { + size_t result = 0; + for (const auto& child : node->get_users()) { + if (ov::is_type(child)) { + result++; + } + } + return result; +} + +auto outputs_are_not_broadcastable(const std::shared_ptr& node) -> bool { + const auto& outputs = node->outputs(); + if (outputs.size() <= 1) + return false; + ov::PartialShape ref_shape = outputs.front().get_partial_shape(); + bool success = true; + for (size_t i = 1; i < outputs.size() && success; i++) { + success &= ov::PartialShape::broadcast_merge_into(ref_shape, outputs[i].get_partial_shape(), ov::op::AutoBroadcastType::NUMPY); + } + return !success; +} +} // namespace + +bool tokenize_node(const std::shared_ptr& node, const SnippetsTokenization::Config& config) { + const auto getFusedNames = [](const std::shared_ptr& n) -> std::string { + auto rt_info = n->get_rt_info(); + auto it = rt_info.find("originalLayersNames"); + if (it != rt_info.end()) { + return it->second.as() + ","; + } + return ""; + }; + + auto create_single_node_subgraph = [&](const std::shared_ptr &node) { + auto subgraph = op::Subgraph::wrap_node_as_subgraph(node); + subgraph->get_rt_info()["originalLayersNames"] = getFusedNames(node) + node->get_friendly_name(); + ov::replace_node(node, subgraph); + op::update_out_tensor_name(subgraph); + }; + + auto abort = [&](const std::string& message) { + remark(3) << message << std::endl; + create_single_node_subgraph(node); + return true; + }; + // inputs that are already subgraphs + std::unordered_set> input_subgraphs; + // clone bodies because we need a rollback if loop is found + std::map, std::shared_ptr> clones; + + ParameterVector body_parameters; + // inputs to merged subgraph + OutputVector external_inputs; + // inputs to the node before merge to subgraph + OutputVector internal_inputs; + // nodes whose rt_info should be copied into result subgraph + NodeVector replaced_nodes{node}; + + auto input_values = node->input_values(); + /* + * Called with subgraph->input_value(i) arg and used to + * Check that the attached node input subgraph has the same input as the node itself. + * If true, then ternary merge is initiated. + * input + * / \ + * subgraph--node + */ + auto is_recurrent = [&input_values](const ov::Output& to_find) -> bool { + return std::any_of(input_values.begin(), input_values.end(), + [&](const ov::Output &in) {return in == to_find;}); + }; + /* + * Checks if the passed node introduces loop dependency for given topological bounds (pair of maxParentOrder, minChildOrder). + * The bounds are presumed to be without dependency. The bounds are updated if no dependency is introduced by the node. + */ + const auto cyclicDependencyIsIntoduced = [&node](const std::shared_ptr& nodeToExamine, std::pair& currentBounds) -> bool { + assert(currentBounds.first < currentBounds.second && "Invalid currentBounds passed"); + const auto& parentNodes = ov::as_node_vector(nodeToExamine->input_values()); + const int64_t maxParentOrder = std::accumulate(parentNodes.begin(), parentNodes.end(), currentBounds.first, + [](int64_t maxOrder, std::shared_ptr n){ + if (ov::is_type(n) || ov::is_type(n)) + return maxOrder; + return std::max(maxOrder, GetTopologicalOrder(n)); + }); + const auto& childNodes = nodeToExamine->get_users(); + // Skip the node being attached, since it will be a part of subgraph and can't introduce loop dependency + const int64_t minChildOrder = std::accumulate(childNodes.begin(), childNodes.end(), currentBounds.second, + [&node](int64_t minOrder, std::shared_ptr n){ + if (ov::is_type(n) || n == node) + return minOrder; + return std::min(minOrder, GetTopologicalOrder(n)); + }); + if (maxParentOrder < minChildOrder) { + currentBounds = std::pair(maxParentOrder, minChildOrder); + return false; + } + return true; + }; + + for (const auto& input_node : ov::as_node_vector(input_values)) { + if (auto subgraph = ov::as_type_ptr(input_node)) { + if (!clones.count(input_node) && GetSnippetsSubgraphType(subgraph) != SnippetsSubgraphType::Completed) { + auto f = subgraph->body().clone(); + f->set_friendly_name(subgraph->body_ptr()->get_friendly_name()); + clones[input_node] = f; + } + } + } + // If there are no input subgraphs no need to go further, just create a new one. + if (clones.empty()) { + create_single_node_subgraph(node); + remark(1) << "Starting subgraph at: " << node->get_friendly_name() + << " with " << node->inputs().size() << " inputs and " << node->outputs().size() + << " outputs" << std::endl; + return true; + } + std::string subgraph_name = node->get_friendly_name(); + std::string fusedNames{}; + size_t num_result_children = 0; + std::pair currentTopoBounds {-1, LONG_MAX}; + cyclicDependencyIsIntoduced(node, currentTopoBounds); + assert(!cyclicDependencyIsIntoduced(node, currentTopoBounds) && "Cyclic dependency is introduced by the node itself"); + for (const auto& input_value : input_values) { + auto input_node = input_value.get_node_shared_ptr(); + if (ov::is_type(input_node) && + !cyclicDependencyIsIntoduced(input_node, currentTopoBounds)) { + auto subgraph = std::static_pointer_cast(input_node); + if (!input_subgraphs.count(input_node)) { + input_subgraphs.insert(input_node); + + fusedNames += getFusedNames(subgraph); + replaced_nodes.push_back(subgraph); + + if (has_result_child(subgraph)) { + // we set input subgraph name to the current subgraph + // in order to save node friendly name before result + subgraph_name = subgraph->get_friendly_name(); + num_result_children += 1; + } + auto f = clones[input_node]; + const auto& input_body_parameters = f->get_parameters(); + // Todo: + // Some of the input subgraphs might have common parents, so some of the input_parameters might already be + // in external_inputs and hence in body_parameters. Here we handle this case and remove repeated body_parameters. + // Would it be better to incorporate all inputs first and then remove repeated params. + for (size_t i = 0; i < input_body_parameters.size(); ++i) { + auto found = std::find(external_inputs.begin(), external_inputs.end(), subgraph->input_value(i)); + if (found != external_inputs.end()) { + // Todo: here we rely on friendly_name uniqueness. Propose a different algorithm. + size_t current_input_index = body_parameters.size(); + for (size_t p_ind = 0; p_ind < body_parameters.size(); p_ind++) { + const auto& p = body_parameters[p_ind]; + // unite two body parameters from two input subgraphs only if: + // 1. two input subgraphs are connected to the same parent node/subgraph, + // 2. and connected to the same output port of this parent node/subgraph. + if (p->get_friendly_name() == found->get_node_shared_ptr()->get_friendly_name() && + external_inputs[p_ind] == *found) { + current_input_index = p_ind; + break; + } + } + + if (current_input_index < body_parameters.size()) { + remark(13) << "replacing " << *found << " " << current_input_index << " with " + << body_parameters[current_input_index] << std::endl; + f->replace_parameter(i, body_parameters[current_input_index]); + } else { + external_inputs.push_back(subgraph->input_value(i)); + body_parameters.push_back(input_body_parameters[i]); + } + } else if (is_recurrent(subgraph->input_value(i))) { + remark(13) << "ternary merge is conducted " << subgraph->input_value(i).get_node_shared_ptr() << std::endl; + + auto internal = input_body_parameters[i]; + auto internal_consumers = internal->outputs(); + if (auto to_replace_with = ov::as_type_ptr(subgraph->get_input_node_shared_ptr(i))) { + // todo: In principle, we can still attach the node to the subgraph if cyclic dependency is introduced during ternary merge. + // Need to support. + if (cyclicDependencyIsIntoduced(to_replace_with, currentTopoBounds)) + return abort("Attempt to perform recurrent merge for cyclic-dependent subgraphs. Aborting."); + for (const auto& output : internal_consumers) { + for (auto consumer : output.get_target_inputs()) { + auto other_body = clones[subgraph->get_input_node_shared_ptr(i)]; + auto other_body_result = other_body->get_results()[consumer.get_source_output().get_index()]; + auto result_producer = other_body_result->input(0).get_source_output(); + + consumer.replace_source_output(result_producer.get_node_shared_ptr()); + } + } + } else { + external_inputs.push_back(subgraph->input_value(i)); + body_parameters.push_back(input_body_parameters[i]); + } + } else { + external_inputs.push_back(subgraph->input_value(i)); + body_parameters.push_back(input_body_parameters[i]); + } + } + } + + // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs + // internal output index == external output index + auto& input_body = clones[input_node]; + size_t source_output_index = input_value.get_index(); + auto source_result = input_body->get_results()[source_output_index]; + + // We cannot add new node, that is not Convert, after Convert (that is start node) to avoid arithmetic problems with conversion + // We can add any new node in Subgraph after Convert (bacause after Input) + // Parameter + // | + // Convert + // + // We cannot add new node, that isn't Convert, in Subgraph after existing Convert + // Parameter + // Relu + // Convert + // + // But we can add new Convert in Subgraph after existing Convert + // Parameter + // Relu + // Convert + // Convert + // + // Thus, We can grow subgraph only if Convert is the first node of subgraph and have to abort it's the last one and we want to add not Convert + // We have this limitation because at the moment we support only one execution precision inside body, so + // if there is Convert with input and output data types that aren't equal to supported exec type, + // we can get conversion math errors + const auto output_of_subgraph = source_result->get_input_node_shared_ptr(0); + if (!ov::is_type(node) && ov::is_type(output_of_subgraph)) { + // Also we can add new node after < Parameter -> Convert -> Convert -> Convert > + auto grandparent = output_of_subgraph->get_input_node_ptr(0); + while (ov::is_type(grandparent)) { + grandparent = grandparent->get_input_node_ptr(0); + } + + if (!ov::is_type(grandparent)) { + return abort("Convert supports only as Input and as Result of subgraph. Aborting"); + } + } + // Result op has a single input + internal_inputs.push_back(source_result->input_value(0)); + } else { + // We need some non-scalar constants inside Subgraph in the following cases: + // [*] We have to save explicitly FQ Constants to call ConstantFolding after Tokenization. + // After ConstantFolding we will move remaining non-scalar Constants from body using ConvertConstantsToParameters pass + // [*] We support Transpose with second Constant input (represents order). This Constant will not be scheduled + // and will only be used to decompose Transpose into a proper Load, Store and Loop combination. + if (ov::is_type(input_node) && + (ov::shape_size(input_value.get_shape()) == 1 || + ov::is_type(node) || + op::Subgraph::constant_input_should_be_inside_body(node))) { + internal_inputs.push_back(input_node->output(0)); + } else { + external_inputs.push_back(input_value); + auto new_parameter = std::make_shared(input_value.get_element_type(), input_value.get_partial_shape()); + new_parameter->set_friendly_name(input_node->get_friendly_name()); + body_parameters.push_back(new_parameter); + internal_inputs.push_back(new_parameter->output(0)); + } + } + } + fusedNames += node->get_friendly_name(); + num_result_children += get_num_result_children(node); + if (num_result_children > 1) + return abort("New subgraph is created since too many Result children are detected"); + + auto body_node = node->copy_with_new_inputs(internal_inputs); + body_node->set_friendly_name(node->get_friendly_name()); + + remark(1) << "Original node outputs = " << node->get_output_size() + << " body node outputs = " << body_node->get_output_size() << std::endl; + + if (node->get_output_size() != body_node->get_output_size()) { + OPENVINO_THROW("original node outputs size and extracted node outputs size doesn't much"); + } + + // After some transformations, a different number of Constants for some operations may be created + // than the actual number of Constants during tokenization. + // To avoid unsupported number of non-scalar Constants in the future (plugin specific limitation) + // we should calculate potentional number of non-scalar Constants that will be moved up from body. + size_t hidden_data_count = 0; + if (const auto fq_node = ov::as_type_ptr(node)) { + hidden_data_count += ov::snippets::utils::get_non_scalar_constant_count_for_fq(fq_node); + } + + ResultVector body_results; + std::vector>> subgraph_result_inputs; + + ov::NodeVector ops_for_buffer_count; + for (auto subgraph : input_subgraphs) { + // we should summurize additional needed data count (non-scalar Constants and Buffers) from all input subgraphs + // because we will collapse them with our node and we should get total count + const auto subgraph_ptr = ov::as_type_ptr(subgraph); + hidden_data_count += subgraph_ptr->get_virtual_port_count(); + // Buffers can be existed only in Subgraphs with domain sensetive ops which + // requires intermediate memory for data repacking + // To avoid load time regressions, we verify only these Subgraph with domain sensetive ops + if (subgraph_ptr->has_domain_sensitive_ops()) { + const auto ops = subgraph_ptr->body_ptr()->get_ordered_ops(); + ops_for_buffer_count.insert(ops_for_buffer_count.end(), ops.begin(), ops.end()); + } + + for (auto output : subgraph->outputs()) { + bool first_side_consumer = true; + + for (auto target_input : output.get_target_inputs()) { + auto target_node = target_input.get_node()->shared_from_this(); + + if (input_subgraphs.count(target_node)) { + remark(13) << "ternary merge is conducted " << subgraph << " -> " << target_node << std::endl; + } + + if (!input_subgraphs.count(target_node) && target_node != node) { + if (first_side_consumer) { + auto& input_subgraph_body = clones[subgraph]; + body_results.push_back(std::make_shared( + input_subgraph_body->get_results()[output.get_index()]->input_value(0))); + subgraph_result_inputs.push_back({}); + + first_side_consumer = false; + } + + if (!!subgraph_result_inputs.back().count(target_input)) { + OPENVINO_THROW("target input added twice!!!"); + } + // save target input port outside the body + subgraph_result_inputs.back().insert(target_input); + } + } + } + } + + if (op::Subgraph::is_domain_sensitive_op(node)) { + ops_for_buffer_count.push_back(node); + } + + for (auto output : node->outputs()) { + body_results.push_back(std::make_shared(body_node->output(output.get_index()))); + subgraph_result_inputs.push_back(output.get_target_inputs()); + } + + if (body_results.size() != subgraph_result_inputs.size()) { + OPENVINO_THROW("body results and node results size mismatch during subgraph collaps"); + } + + // The each data node (Parameter (and non-Scalar Constants), Result, Buffers with the same ID) requires the own unique GPR. + // At the moment, CPU Plugin has limitation for GPR registers: there are 12 available GPRs, + // and one of them must be reserved for runtime parameters, so only 11 can be used during kernel execution. + // This limitation will be resolved once generator supports gprs spills [75622]. + // TODO [75567]: move this plugin-specific constraint to the plugin callback + const auto unique_buffer_count = op::Subgraph::get_estimated_buffer_count(ops_for_buffer_count); + const size_t max_data_ptr_count = config.get_data_ptr_gpr_count(); + if (body_parameters.size() + body_results.size() + hidden_data_count + unique_buffer_count > max_data_ptr_count) { + const std::string message_reset = "new subgraph is created. Impossible to schedule subgraph with " + + std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " + + std::to_string(hidden_data_count) + " non-scalar constants and " + std::to_string(unique_buffer_count) + "buffers."; + return abort(message_reset); + } + + auto body = op::create_body(node->get_friendly_name(), body_results, body_parameters); + for (size_t i = 0; i < body->get_parameters().size(); i++) { + body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + auto subgraph = op::build_subgraph(node, external_inputs, body, subgraph_name); + copy_runtime_info(replaced_nodes, subgraph); + const auto& act_body = subgraph->body(); + for (size_t i = 0; i < act_body.get_parameters().size(); i++) { + act_body.get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + + if (subgraph->get_output_size() != subgraph_result_inputs.size()) { + OPENVINO_THROW("newly create subgraph doesn't much number of results"); + } + + if (outputs_are_not_broadcastable(subgraph)) + return abort("New subgraph is created due to outputs of a subgraph not broadcastable."); + + for (size_t i = 0; i < subgraph->get_output_size(); ++i) { + for (auto target_input : subgraph_result_inputs[i]) { + target_input.replace_source_output(subgraph->output(i)); + } + } + op::update_out_tensor_name(subgraph); + + subgraph->validate_and_infer_types(); + + const auto& act_body1 = subgraph->body(); + for (size_t i = 0; i < act_body1.get_parameters().size(); i++) { + act_body1.get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + subgraph->get_rt_info()["originalLayersNames"] = fusedNames; + subgraph->set_virtual_port_count(hidden_data_count); + + remark(1) << "Replacement (merge) done for: " + << subgraph->get_friendly_name() + << " with " << subgraph->inputs().size() + << " inputs and " << subgraph->outputs().size() + << " outputs and " << subgraph->body_ptr()->get_ops().size() << " ops total\n"; + + return true; +} + +} // namespace utils +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 38649b2906e9e3..5d76e85f2e8364 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -143,6 +143,7 @@ // Snippets #include "snippets/pass/tokenization.hpp" #include "snippets/pass/mha_tokenization.hpp" +#include "snippets/pass/fc_tokenization.hpp" #include "snippets/pass/collapse_subgraph.hpp" #include "snippets/pass/common_optimizations.hpp" #include "snippets/pass/split_dimension_m.hpp" @@ -932,6 +933,7 @@ void Transformations::MainSnippets(void) { #else CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, inferencePrecision == ov::element::bf16); #endif + CPU_DISABLE_PASS_COMMON(snippetsManager, snippets::pass::TokenizeFCSnippets); } CPU_REGISTER_PASS_X64(snippetsManager, snippets::pass::SnippetsTokenization, tokenization_config); // [126738] Remove precision constraint when Convert emitters are implemented on arm platform diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index dd6cc034cf31b4..1c7fd22e018eb6 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -578,14 +578,14 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*smoke_Snippets_AddSoftmax.*)"); retVector.emplace_back(R"(.*smoke_Snippets_TransposeSoftmaxEltwise.*)"); // Low-precision Matmuls are not supported by TPP yet - retVector.emplace_back(R"(.*smoke_Snippets_MatMulFQ.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MatMulBiasQuantized.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MatMulQuantized.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MatMulQuantizedSoftmax.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MHAINT8MatMul.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MHAQuantMatMul0.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MHAFQ.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_PrecisionPropagation_Convertion.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MatMulFQ.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MatMulBiasQuantized.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MatMulsQuantized.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MatMulsQuantizedSoftmax.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MHAINT8MatMul.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MHAQuantMatMul0.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*MHAFQ.*)"); + retVector.emplace_back(R"(.*smoke_Snippets.*PrecisionPropagation_Convertion.*)"); retVector.emplace_back(R"(.*smoke_MHAQuant.*)"); #endif diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp new file mode 100644 index 00000000000000..572621e6644fde --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/matmul.hpp" + +#include "common_test_utils/test_constants.hpp" +#include "openvino/runtime/system_conf.hpp" + +namespace ov { +namespace test { +namespace snippets { +namespace { +static inline std::vector> quantized_precisions() { + std::vector> prc = {}; + // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms + if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { + prc.emplace_back(std::vector{element::i8, element::i8}); + prc.emplace_back(std::vector{element::u8, element::i8}); + } + return prc; +} + +static inline std::vector> precisions(bool only_fp32 = true) { + std::vector> prc = { + {element::f32, element::f32}, + }; +// Note: TPP doesn't support low precisions yet +#ifndef SNIPPETS_LIBXSMM_TPP + if (!only_fp32) { + auto quant = quantized_precisions(); + std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); + // In Snippets MatMul BF16 is supported only on bf16/AMX platforms + if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { + prc.emplace_back(std::vector{element::bf16, element::bf16}); + } + } +#endif + return prc; +} + +std::vector> fc_input_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{2500, 256}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, MatMul, + ::testing::Combine( + ::testing::ValuesIn(fc_input_shapes), + ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedFQ, MatMulFQ, + ::testing::Combine( + ::testing::ValuesIn(fc_input_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul; + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedEltwiseChain, MatMulEltwiseChain, + ::testing::Combine( + ::testing::ValuesIn(fc_input_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> fc_cascade_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}, {2, 1, 32, 2500}}}, + {PartialShape{}, {{2500, 128}}}, + {PartialShape{}, {{128, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedEltwiseChainCascade, MatMulEltwiseChainCascade, + ::testing::Combine( + ::testing::ValuesIn(fc_cascade_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> fc_transpose_b_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{256, 2500}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedTransposeB, MatMulTransposeB, + ::testing::Combine( + ::testing::ValuesIn(fc_transpose_b_shapes), + ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + + +std::vector> fc_bias_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{2500, 256}}}, + {PartialShape{-1, -1, -1, 256}, {{1, 1, 32, 256}, {1, 1, 80, 256}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedBias, MatMulBias, + ::testing::Combine( + ::testing::ValuesIn(fc_bias_shapes), + ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Subgraph; + ::testing::Values(1), // Tokenized MatMul+Bias + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedBiasQuantized, MatMulBiasQuantized, + ::testing::Combine( + ::testing::ValuesIn(fc_bias_shapes), + ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Subgraph + ::testing::Values(1), // Tokenized MatMul+Bias + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> fc_quantized_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{2500, 256}}}, + {{}, {{256, 64}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedsQuantized, MatMulsQuantized, + ::testing::Combine( + ::testing::ValuesIn(fc_quantized_shapes), + ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Reshape on weights is folded => only 1 Subgraph remains + ::testing::Values(1), // Tokenized [MatMul+FQ+Matmul] + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedsQuantizedSoftmax, MatMulsQuantizedSoftmax, + ::testing::Combine( + ::testing::ValuesIn(fc_quantized_shapes), + ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Reshape on weights is folded => only 1 Subgraph remains + ::testing::Values(1), // Tokenized [MatMul+FQ+Matmul] + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index b800d1a2824b43..b0e8d58da2f0b2 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -59,6 +59,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, MatMul, ::testing::Combine( ::testing::ValuesIn(input_shapes), ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -104,6 +105,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMatMul, MatMul, ::testing::Combine( ::testing::ValuesIn(input_shapes_dynamic), ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -113,11 +115,40 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulFQ, MatMulFQ, ::testing::Combine( ::testing::ValuesIn(input_shapes), ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul; ::testing::Values(1), // Tokenized MatMul ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulEltwiseChain, MatMulEltwiseChain, + ::testing::Combine( + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> matmul_cascade_shapes{ + { + {PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 2500}, {1, 3, 80, 700}, {2, 1, 32, 2500}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 2, 2500, 128}, {1, 3, 700, 150}, {1, 2, 2500, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 1, 128, 64}, {1, 3, 150, 128}, {1, 1, 128, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulEltwiseChainCascade, MatMulEltwiseChainCascade, + ::testing::Combine( + ::testing::ValuesIn(matmul_cascade_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + const auto& transpose_b_shapes = STATIC_SHAPES( {{3, 3, 64, 64}, {3, 3, 64, 64}}, {{1, 1, 32, 128}, {1, 1, 64, 128}}, @@ -131,6 +162,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulTransposeB, MatMulTransposeB, ::testing::Combine( ::testing::ValuesIn(transpose_b_shapes), ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -141,6 +173,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias, ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 1, 43, 49}, {1, 1, 69, 49}}, {{1, 2, 95, 1023}, {1, 2, 1023, 255}, {1, 2, 95, 255}})), ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph; ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -162,6 +195,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMatMulBias, MatMulBias, ::testing::Combine( ::testing::ValuesIn(input_shapes_dynamic_bias), ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph; ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -172,29 +206,31 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 1, 43, 49}, {1, 2, 1, 1}}, {{1, 2, 69, 43}, {2, 1, 43, 49}, {1, 2, 69, 49}})), ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulQuantized, MatMulQuantized, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, ::testing::Combine( ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulQuantizedSoftmax, MatMulQuantizedSoftmax, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax, ::testing::Combine( ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); - } // namespace } // namespace snippets } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp index 949cbe37bbadd4..c05087283305e4 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp @@ -41,6 +41,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::ValuesIn(transpose_input_shapes), ::testing::Values(0), // Transpose on 0th Matmul input ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -57,22 +58,29 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMatMult, TransposeMatMul, ::testing::ValuesIn(transpose_input_shapes_dynamic), ::testing::Values(0), // Transpose on 0th Matmul input ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeMatMul::getTestCaseName); -// TODO: FuseTransposeToBrgemm supports fusing only if Transpose is before Parameter in cases when Transpose is on input at the moment -// When we support the branch Parameter->FQ->Transpose->MatMul[0th input], uncomment this test case please -// INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, -// ::testing::Combine( -// ::testing::ValuesIn(transpose_input_shapes), -// ::testing::Values(0), // Transpose on 0th Matmul input -// ::testing::Values(ov::element::i8), -// ::testing::Values(1), // MatMul -// ::testing::Values(1), // Tokenized MatMul + FusedTranspose -// ::testing::Values(ov::test::utils::DEVICE_CPU)), -// TransposeMatMulFQ::getTestCaseName); +std::vector> fc_transpose_input_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{1, 49, 2, 2500}, {1, 70, 2, 2500}, {1, 49, 2, 2500}}}, + {{}, {{2500, 256}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, + ::testing::Combine( + ::testing::ValuesIn(fc_transpose_input_shapes), + ::testing::Values(0), // Transpose on 0th Matmul input + ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Fused MatMul + Transpose + ::testing::Values(1), // Tokenized MatMul + FusedTranspose + ::testing::Values(ov::test::utils::DEVICE_CPU)), + TransposeMatMul::getTestCaseName); } // namespace transpose_zero_input namespace transpose_first_input { @@ -82,6 +90,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::ValuesIn(transpose_input_shapes), ::testing::Values(1), // Transpose on 1st Matmul input ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -98,6 +107,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMatMult, TransposeMatMul, ::testing::ValuesIn(transpose_input_shapes_dynamic), ::testing::Values(1), // Transpose on 1st Matmul input ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -108,6 +118,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, ::testing::ValuesIn(transpose_input_shapes), ::testing::Values(1), // Transpose on 1st Matmul input ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -122,6 +133,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::ValuesIn(transpose_input_shapes), ::testing::Values(2), // Transpose on Matmul output ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -138,22 +150,29 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMatMult, TransposeMatMul, ::testing::ValuesIn(transpose_input_shapes_dynamic), ::testing::Values(2), // Transpose on Matmul output ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeMatMul::getTestCaseName); -// TODO: At the moment we doesn't support the branch MatMul[output]->Transpose->FQ. -// When we add support, uncomment this test case please -// INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, -// ::testing::Combine( -// ::testing::ValuesIn(transpose_input_shapes), -// ::testing::Values(2), // Transpose on Matmul output -// ::testing::Values(ov::element::i8), -// ::testing::Values(1), // MatMul -// ::testing::Values(1), // Tokenized MatMul + FusedTranspose -// ::testing::Values(ov::test::utils::DEVICE_CPU)), -// TransposeMatMulFQ::getTestCaseName); +std::vector> fc_transpose_input_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 49, 2500}, {1, 2, 70, 2500}, {2, 1, 49, 2500}}}, + {{}, {{2500, 256}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, + ::testing::Combine( + ::testing::ValuesIn(fc_transpose_input_shapes), + ::testing::Values(2), // Transpose on Matmul output + ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + FusedTranspose + ::testing::Values(ov::test::utils::DEVICE_CPU)), + TransposeMatMul::getTestCaseName); } // namespace transpose_output namespace explicit_transpose { @@ -179,6 +198,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTranspo ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph; ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -195,6 +215,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynExplicitTransposeMatMul, ExplicitTran ::testing::ValuesIn(explicit_input_shapes_dynamic), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph; ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -205,6 +226,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMa ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph; ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -222,6 +244,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynTransposeMatMulBias, ExplicitTranspos ::testing::ValuesIn(explicit_bias_input_shapes_dynamic), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions(true)), + ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph; ::testing::Values(1), // Tokenized MatMul+Bias ::testing::Values(ov::test::utils::DEVICE_CPU)), diff --git a/src/tests/functional/plugin/shared/include/snippets/add.hpp b/src/tests/functional/plugin/shared/include/snippets/add.hpp index 399c6c3bc8b7aa..b51df62dd4da72 100644 --- a/src/tests/functional/plugin/shared/include/snippets/add.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/add.hpp @@ -37,7 +37,7 @@ typedef std::tuple< > AddParamsPair; class Add : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -46,7 +46,7 @@ class Add : public testing::WithParamInterface, }; class AddConst : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); protected: @@ -59,7 +59,7 @@ class AddRollConst : public AddConst { }; class AddPair : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); protected: diff --git a/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp index 20009c401147e2..282c1241555cc9 100644 --- a/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp @@ -24,8 +24,7 @@ typedef std::tuple < std::string // target device > CheckBroadcastParams; -class CheckBroadcast : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { +class CheckBroadcast : public testing::WithParamInterface, virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/codegen_gelu.hpp b/src/tests/functional/plugin/shared/include/snippets/codegen_gelu.hpp index 1ef03e212a5378..9085ab23801a34 100644 --- a/src/tests/functional/plugin/shared/include/snippets/codegen_gelu.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/codegen_gelu.hpp @@ -19,7 +19,7 @@ typedef std::tuple< > CodegenGeluParams; class CodegenGelu : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/conv_eltwise.hpp b/src/tests/functional/plugin/shared/include/snippets/conv_eltwise.hpp index ef5b315c3f67e2..a2db79e13fcbe2 100644 --- a/src/tests/functional/plugin/shared/include/snippets/conv_eltwise.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/conv_eltwise.hpp @@ -20,7 +20,7 @@ typedef std::tuple< > ConvEltwiseParams; class ConvEltwise : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/convert.hpp b/src/tests/functional/plugin/shared/include/snippets/convert.hpp index 8e3871a77ea0ad..6de675c0b70da2 100644 --- a/src/tests/functional/plugin/shared/include/snippets/convert.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/convert.hpp @@ -21,7 +21,7 @@ typedef std::tuple< using parameters = std::vector>; class Convert : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/edge_replace.hpp b/src/tests/functional/plugin/shared/include/snippets/edge_replace.hpp index 698f69043f9590..6c89453c0b0990 100644 --- a/src/tests/functional/plugin/shared/include/snippets/edge_replace.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/edge_replace.hpp @@ -19,7 +19,7 @@ typedef std::tuple< > EdgeReplaceParams; class EdgeReplace : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp b/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp index 17a598a3ee9ac5..b28bd300b9908c 100644 --- a/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp @@ -19,7 +19,7 @@ typedef std::tuple< > EltwiseTwoResultsParams; class EltwiseTwoResults : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -27,7 +27,6 @@ class EltwiseTwoResults : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/exp.hpp b/src/tests/functional/plugin/shared/include/snippets/exp.hpp index 477377c2154443..0f02877ffeaa90 100644 --- a/src/tests/functional/plugin/shared/include/snippets/exp.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/exp.hpp @@ -19,7 +19,7 @@ typedef std::tuple< > ExpParams; class Exp : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/fake_quantize_decomposition_test.hpp b/src/tests/functional/plugin/shared/include/snippets/fake_quantize_decomposition_test.hpp index 26b817f0c0b50c..6ddf9873b74ebe 100644 --- a/src/tests/functional/plugin/shared/include/snippets/fake_quantize_decomposition_test.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/fake_quantize_decomposition_test.hpp @@ -37,7 +37,7 @@ typedef std::tuple< std::string // target device > testsParams; -class FakeQuantizeDecompositionTest : public testing::WithParamInterface, virtual public ov::test::SnippetsTestsCommon { +class FakeQuantizeDecompositionTest : public testing::WithParamInterface, virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp b/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp index 7f37032c8303f5..5faa6365c082e8 100644 --- a/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp @@ -20,7 +20,7 @@ typedef std::tuple< > GroupNormalizationParams; class GroupNormalization : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/matmul.hpp b/src/tests/functional/plugin/shared/include/snippets/matmul.hpp index 82bd0a0d42ce2f..c592b3900d1b84 100644 --- a/src/tests/functional/plugin/shared/include/snippets/matmul.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/matmul.hpp @@ -5,6 +5,7 @@ #pragma once #include "shared_test_classes/base/snippets_test_utils.hpp" +#include "subgraph_matmul.hpp" namespace ov { namespace test { @@ -13,50 +14,71 @@ namespace snippets { typedef std::tuple< std::vector, // Input Shapes std::vector,// Input Element types + MatMulType, size_t, // Expected num nodes size_t, // Expected num subgraphs std::string // Target Device > MatMulParams; +class MatMulBase : public SnippetsTestsCommon { +protected: + /** + * @brief Erases shapes with the given indices from inputDynamicShapes and targetStaticShapes + */ + void filter_shape_info(const std::set& idces_to_remove); + virtual std::shared_ptr get_builder(const std::vector& types) = 0; + + MatMulType matmul_type; +}; + class MatMul : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public MatMulBase { public: static std::string getTestCaseName(testing::TestParamInfo obj); protected: void SetUp() override; - - virtual void init_subgraph(const std::vector& types); + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulTransposeB : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulFQ : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulBias : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulBiasQuantized : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; +}; + +class MatMulsQuantized : public MatMul { +protected: + std::shared_ptr get_builder(const std::vector& types) override; +}; + +class MatMulsQuantizedSoftmax : public MatMul { +protected: + std::shared_ptr get_builder(const std::vector& types) override; }; -class MatMulQuantized : public MatMul { +class MatMulEltwiseChain : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; -class MatMulQuantizedSoftmax : public MatMul { +class MatMulEltwiseChainCascade : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; } // namespace snippets diff --git a/src/tests/functional/plugin/shared/include/snippets/max_num_params_eltwise.hpp b/src/tests/functional/plugin/shared/include/snippets/max_num_params_eltwise.hpp index beaa33127d955b..fc02553a0b1bd5 100644 --- a/src/tests/functional/plugin/shared/include/snippets/max_num_params_eltwise.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/max_num_params_eltwise.hpp @@ -18,7 +18,7 @@ typedef std::tuple< > MaxNumParamsEltwiseParams; class MaxNumParamsEltwise : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/mha.hpp b/src/tests/functional/plugin/shared/include/snippets/mha.hpp index 0a1733a6099242..f8198dee0218ee 100644 --- a/src/tests/functional/plugin/shared/include/snippets/mha.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/mha.hpp @@ -34,7 +34,7 @@ typedef std::tuple, // Input shapes > MHAWithDynamicMulParams; -class MHABase : virtual public ov::test::SnippetsTestsCommon { +class MHABase : virtual public SnippetsTestsCommon { public: constexpr static size_t default_thread_count = 0; diff --git a/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp b/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp index 759b89fbd46d00..4a7de880d3b97b 100644 --- a/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp @@ -20,7 +20,7 @@ typedef std::tuple< class PrecisionPropagationConvertion : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/reduce.hpp b/src/tests/functional/plugin/shared/include/snippets/reduce.hpp index 56def308bde5b0..31e95962b687be 100644 --- a/src/tests/functional/plugin/shared/include/snippets/reduce.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/reduce.hpp @@ -21,7 +21,7 @@ typedef std::tuple ReduceParams; class Reduce : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/select.hpp b/src/tests/functional/plugin/shared/include/snippets/select.hpp index 35efecdf90abed..6ccac48dd48aff 100644 --- a/src/tests/functional/plugin/shared/include/snippets/select.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/select.hpp @@ -32,7 +32,7 @@ typedef std::tuple< > BroadcastSelectParams; class Select : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -43,7 +43,7 @@ class Select : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/softmax.hpp b/src/tests/functional/plugin/shared/include/snippets/softmax.hpp index 79a7ddcffdecbf..d69bd8e648a710 100644 --- a/src/tests/functional/plugin/shared/include/snippets/softmax.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/softmax.hpp @@ -27,7 +27,7 @@ typedef std::tuple< > AddSoftmaxParams; class Softmax : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -36,7 +36,7 @@ class Softmax : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp b/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp index 68aff359bac5fd..a14162460d4f39 100644 --- a/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp @@ -20,7 +20,7 @@ typedef std::tuple< > ThreeInputsEltwiseParams; class ThreeInputsEltwise : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/transpose.hpp b/src/tests/functional/plugin/shared/include/snippets/transpose.hpp index beec2c3750cbb1..f0f1f254ee37d0 100644 --- a/src/tests/functional/plugin/shared/include/snippets/transpose.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/transpose.hpp @@ -27,7 +27,7 @@ typedef std::tuple< > TransposeMulParams; class Transpose : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -36,7 +36,7 @@ class Transpose : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp b/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp index ccd6016178d71a..07960cb422d5e4 100644 --- a/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp @@ -5,6 +5,7 @@ #pragma once #include "shared_test_classes/base/snippets_test_utils.hpp" +#include "snippets/matmul.hpp" namespace ov { namespace test { @@ -14,33 +15,37 @@ typedef std::tuple< std::vector, // Input Shapes size_t , // Transpose position std::vector,// Input Element types + MatMulType, size_t, // Expected num nodes size_t, // Expected num subgraphs std::string // Target Device > TransposeMatMulParams; class TransposeMatMul : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public MatMulBase { public: static std::string getTestCaseName(testing::TestParamInfo obj); protected: void SetUp() override; + std::shared_ptr get_builder(const std::vector& types) override; + + size_t transpose_position; }; class TransposeMatMulFQ : public TransposeMatMul { protected: - void SetUp() override; + std::shared_ptr get_builder(const std::vector& types) override; }; class ExplicitTransposeMatMul : public TransposeMatMul { protected: - void SetUp() override; + std::shared_ptr get_builder(const std::vector& types) override; }; class ExplicitTransposeMatMulBias : public TransposeMatMul { protected: - void SetUp() override; + std::shared_ptr get_builder(const std::vector& types) override; }; } // namespace snippets diff --git a/src/tests/functional/plugin/shared/include/snippets/transpose_softmax.hpp b/src/tests/functional/plugin/shared/include/snippets/transpose_softmax.hpp index 368b5826b34c45..4c1ac6443db3fb 100644 --- a/src/tests/functional/plugin/shared/include/snippets/transpose_softmax.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/transpose_softmax.hpp @@ -21,7 +21,7 @@ typedef std::tuple< class TransposeSoftmax : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp b/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp index 00d0a78f5276a7..b39b48e2819a92 100644 --- a/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp @@ -18,7 +18,7 @@ typedef std::tuple< > TwoInputsAndOutputsParams; class TwoInputsAndOutputs : public testing::WithParamInterface, - virtual public ov::test::SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -26,7 +26,6 @@ class TwoInputsAndOutputs : public testing::WithParamInterface& idces_to_remove) { + for (auto idx_it = idces_to_remove.rbegin(); idx_it != idces_to_remove.rend(); ++idx_it) { + const auto& idx = * idx_it; + OPENVINO_ASSERT(idx < inputDynamicShapes.size()); + inputDynamicShapes.erase(inputDynamicShapes.begin() + idx); + for (auto& target_shapes : targetStaticShapes) { + OPENVINO_ASSERT(idx < target_shapes.size()); + target_shapes.erase(target_shapes.begin() + idx); + } + } +} + std::string MatMul::getTestCaseName(testing::TestParamInfo obj) { std::vector input_shapes; std::vector elem_types; + MatMulType mm_type; std::string targetDevice; size_t num_nodes, num_subgraphs; - std::tie(input_shapes, elem_types, num_nodes, num_subgraphs, targetDevice) = obj.param; + std::tie(input_shapes, elem_types, mm_type, num_nodes, num_subgraphs, targetDevice) = obj.param; std::ostringstream result; for (size_t i = 0; i < input_shapes.size(); i++) result << "IS[" << i << "]=" << input_shapes[i] << "_"; for (size_t i = 0; i < elem_types.size(); i++) result << "T[" << i <<"]=" << elem_types[i] << "_"; + + result << mm_type << "_"; result << "#N=" << num_nodes << "_"; result << "#S=" << num_subgraphs << "_"; result << "targetDevice=" << targetDevice; @@ -32,48 +48,51 @@ std::string MatMul::getTestCaseName(testing::TestParamInfo input_shapes; std::vector elem_types; - std::tie(input_shapes, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); + std::tie(input_shapes, elem_types, matmul_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); init_input_shapes(input_shapes); - init_subgraph(elem_types); + const auto builder = get_builder(elem_types); + function = builder->getOriginal(); + filter_shape_info(builder->get_constant_input_idces()); if (!configuration.count("SNIPPETS_MODE")) { configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); } } -void MatMul::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulFunction(inputDynamicShapes, types); - function = f.getOriginal(); +std::shared_ptr MatMul::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulTransposeB::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulFunction(inputDynamicShapes, types, true); - function = f.getOriginal(); +std::shared_ptr MatMulTransposeB::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type, true); } -void MatMulFQ::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::FQMatMulFunction(inputDynamicShapes); - function = f.getOriginal(); +std::shared_ptr MatMulFQ::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, matmul_type); } -void MatMulBias::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulBiasFunction(inputDynamicShapes, types); - function = f.getOriginal(); +std::shared_ptr MatMulBias::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulBiasQuantized::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulBiasQuantizedFunction(inputDynamicShapes, types); - function = f.getOriginal(); +std::shared_ptr MatMulBiasQuantized::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulQuantized::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulsQuantizedFunction(inputDynamicShapes, types); - function = f.getOriginal(); +std::shared_ptr MatMulsQuantized::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulQuantizedSoftmax::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulsQuantizedSoftmaxFunction(inputDynamicShapes, types); - function = f.getOriginal(); +std::shared_ptr MatMulsQuantizedSoftmax::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); +} + +std::shared_ptr MatMulEltwiseChain::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); +} + +std::shared_ptr MatMulEltwiseChainCascade::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } TEST_P(MatMul, CompareWithRefImpl) { @@ -107,19 +126,31 @@ TEST_P(MatMulBiasQuantized, CompareWithRefImpl) { validateNumSubgraphs(); } -TEST_P(MatMulQuantized, CompareWithRefImpl) { +TEST_P(MatMulsQuantized, CompareWithRefImpl) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); validateNumSubgraphs(); } -TEST_P(MatMulQuantizedSoftmax, CompareWithRefImpl) { +TEST_P(MatMulsQuantizedSoftmax, CompareWithRefImpl) { SKIP_IF_CURRENT_TEST_IS_DISABLED() abs_threshold = 4e-6; run(); validateNumSubgraphs(); } +TEST_P(MatMulEltwiseChain, CompareWithRefImpl) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); + validateNumSubgraphs(); +} + +TEST_P(MatMulEltwiseChainCascade, CompareWithRefImpl) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); + validateNumSubgraphs(); +} + } // namespace snippets } // namespace test } // namespace ov diff --git a/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp b/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp index 7e81dd97e4360b..d0a594a881c6ed 100644 --- a/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp @@ -2,10 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "common_test_utils/common_utils.hpp" #include "snippets/transpose_matmul.hpp" -#include "subgraph_matmul.hpp" + +#include "common_test_utils/common_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" +#include "snippets/matmul.hpp" +#include "subgraph_matmul.hpp" namespace ov { namespace test { @@ -15,9 +17,10 @@ std::string TransposeMatMul::getTestCaseName(testing::TestParamInfo input_shapes; size_t transpose_position; std::vector elem_types; + MatMulType matmul_type; std::string targetDevice; size_t num_nodes, num_subgraphs; - std::tie(input_shapes, transpose_position, elem_types, num_nodes, num_subgraphs, targetDevice) = obj.param; + std::tie(input_shapes, transpose_position, elem_types, matmul_type, num_nodes, num_subgraphs, targetDevice) = obj.param; std::ostringstream result; for (size_t i = 0; i < input_shapes.size(); ++i) { result << "IS[" << i << "]=" << input_shapes[i] << "_"; @@ -25,6 +28,7 @@ std::string TransposeMatMul::getTestCaseName(testing::TestParamInfo input_shapes; - size_t transpose_position; - std::vector elem_types; - std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); - init_input_shapes(input_shapes); - - auto f = ov::test::snippets::FQMatMulFunction(inputDynamicShapes, transpose_position); - function = f.getOriginal(); - if (!configuration.count("SNIPPETS_MODE")) { - configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); - } - abs_threshold = 5e-6; +std::shared_ptr TransposeMatMul::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type, transpose_position); } -void ExplicitTransposeMatMul::SetUp() { - std::vector input_shapes; - size_t transpose_position; - std::vector elem_types; - std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); - init_input_shapes(input_shapes); - - auto f = ov::test::snippets::TransposeMatMulFunction(inputDynamicShapes); - function = f.getOriginal(); - if (!configuration.count("SNIPPETS_MODE")) { - configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); - } +std::shared_ptr TransposeMatMulFQ::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, matmul_type, transpose_position); } -void ExplicitTransposeMatMulBias::SetUp() { - std::vector input_shapes; - size_t transpose_position; - std::vector elem_types; - std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); - init_input_shapes(input_shapes); +std::shared_ptr ExplicitTransposeMatMul::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes); +} - auto f = ov::test::snippets::TransposeMatMulBiasFunction(inputDynamicShapes); - function = f.getOriginal(); - if (!configuration.count("SNIPPETS_MODE")) { - configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); - } +std::shared_ptr ExplicitTransposeMatMulBias::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes); } TEST_P(TransposeMatMul, CompareWithRefImpl) { @@ -96,6 +73,7 @@ TEST_P(TransposeMatMul, CompareWithRefImpl) { TEST_P(TransposeMatMulFQ, CompareWithRefImpl) { SKIP_IF_CURRENT_TEST_IS_DISABLED() + abs_threshold = 5e-6; run(); validateNumSubgraphs(); } diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/snippets_test_utils.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/snippets_test_utils.hpp index 35f17e42b2ae78..c0652aa257dbd8 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/snippets_test_utils.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/snippets_test_utils.hpp @@ -9,6 +9,8 @@ namespace ov { namespace test { +namespace snippets { +using ov::test::operator<<; class SnippetsTestsCommon : virtual public ov::test::SubgraphBaseTest { protected: void validateNumSubgraphs(); @@ -22,5 +24,6 @@ class SnippetsTestsCommon : virtual public ov::test::SubgraphBaseTest { size_t ref_num_nodes = 0; size_t ref_num_subgraphs = 0; }; +} // namespace snippets } // namespace test } // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp index 433da771cb2b6d..84b075906b5d48 100644 --- a/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp @@ -9,6 +9,7 @@ namespace ov { namespace test { +namespace snippets { void SnippetsTestsCommon::validateNumSubgraphs() { bool isCurrentTestDisabled = ov::test::utils::current_test_is_disabled(); if (isCurrentTestDisabled) @@ -59,5 +60,6 @@ void SnippetsTestsCommon::setInferenceType(ov::element::Type type) { configuration.emplace(ov::hint::inference_precision(type)); } +} // namespace snippets } // namespace test } // namespace ov diff --git a/src/tests/ov_helpers/ov_snippets_models/include/snippets_helpers.hpp b/src/tests/ov_helpers/ov_snippets_models/include/snippets_helpers.hpp index e9ce6d9281d77d..070b079702031f 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/snippets_helpers.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/snippets_helpers.hpp @@ -57,6 +57,7 @@ class SnippetsFunctionBase { const std::vector input_shapes; virtual void validate_function(const std::shared_ptr &f) const; + static void validate_params_shape(const std::vector& input_shapes, const ov::ParameterVector& params); }; /// \brief Base class for snippets subgraphs with customizable embedded op sequences. Note that the custom_ops allowed types are diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_lowered.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_lowered.hpp index e977ae6d8d1709..208dd7f565c9c1 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_lowered.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_lowered.hpp @@ -55,7 +55,7 @@ class EltwiseThreeInputsLoweredFunction : public EltwiseThreeInputsFunction { class Transpose0213MatMulLoweredFunction : public Transpose0213MatMulFunction { public: explicit Transpose0213MatMulLoweredFunction(const std::vector& inputShapes, size_t position = 0) : - Transpose0213MatMulFunction(inputShapes, std::vector{ov::element::f32, ov::element::f32}, position) { + Transpose0213MatMulFunction(inputShapes, std::vector{ov::element::f32, ov::element::f32}, MatMulType::MatMul, position) { } protected: std::shared_ptr initLowered() const override; diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_matmul.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_matmul.hpp index 980f5527569182..69af2616b747de 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_matmul.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_matmul.hpp @@ -15,75 +15,90 @@ namespace ov { namespace test { namespace snippets { +enum class MatMulType { MatMul, FullyConnected }; +std::ostream &operator<<(std::ostream& os, MatMulType type); + +class MatMulFunctionBase : public SnippetsFunctionBase { +public: + explicit MatMulFunctionBase(const std::vector& inputShapes, + MatMulType type, + const std::vector& precisions = {}); + + virtual std::set get_constant_input_idces() const { + return matmul_type == MatMulType::FullyConnected ? std::set{1} : std::set{}; + } + +protected: + void validate_function(const std::shared_ptr &f) const override; + + std::vector precisions; + MatMulType matmul_type; +}; + /// Minimal graph to test MatMul support /// Tokenized simply by starting subgraph, // in1 in2 // Matmul // Result -class MatMulFunction : public SnippetsFunctionBase { +class MatMulFunction : public MatMulFunctionBase { public: - explicit MatMulFunction(const std::vector& inputShapes, const std::vector& precisions, bool transpose_b = false) - : SnippetsFunctionBase(inputShapes), precisions(precisions), transpose_b(transpose_b) { - OPENVINO_ASSERT(input_shapes.size() == 2, "Got invalid number of input shapes"); - validate_precisions(precisions); - } - static void validate_precisions(const std::vector& precisions) { - OPENVINO_ASSERT(precisions.size() == 2, "Got invalid number of input element types"); - const bool is_f32 = ov::snippets::utils::everyone_is(element::f32, precisions[0], precisions[1]); - const bool is_int8 = ov::snippets::utils::one_of(precisions[0], element::i8, element::u8) && precisions[1] == element::i8; - const bool is_bf16 = ov::snippets::utils::everyone_is(element::bf16, precisions[0], precisions[1]); - OPENVINO_ASSERT(is_f32 || is_bf16 || is_int8, "Invalid precisions"); - } + explicit MatMulFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type, + bool transpose_b = false) + : MatMulFunctionBase(inputShapes, type, precisions), transpose_b(transpose_b) {} + protected: std::shared_ptr initOriginal() const override; std::shared_ptr initReference() const override; - std::vector precisions; bool transpose_b; }; -class FQMatMulFunction : public SnippetsFunctionBase { +class FQMatMulFunction : public MatMulFunctionBase { public: - explicit FQMatMulFunction(const std::vector& inputShapes, int pos = -1) : SnippetsFunctionBase({inputShapes[0]}), pos(pos) { + explicit FQMatMulFunction(const std::vector& inputShapes, MatMulType type, int pos = -1) + : MatMulFunctionBase(inputShapes, type), pos(pos) { OPENVINO_ASSERT(inputShapes.size() == 2, "Got invalid number of input shapes"); OPENVINO_ASSERT(pos >=-1 && pos <= 2, "Got invalid transpose position"); - const_shape = inputShapes[1]; + if (type == MatMulType::FullyConnected) + OPENVINO_ASSERT(pos != 1, "transpose on B input is not supported for FullyConnected matmul type"); } + protected: std::shared_ptr initOriginal() const override; - ov::PartialShape const_shape; int pos = -1; }; // As same as MatMulFunction but with biases -class MatMulBiasFunction : public SnippetsFunctionBase { +class MatMulBiasFunction : public MatMulFunctionBase { public: - explicit MatMulBiasFunction(const std::vector& inputShapes, const std::vector& precisions) - : SnippetsFunctionBase(inputShapes), precisions(precisions) { + explicit MatMulBiasFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type) + : MatMulFunctionBase(inputShapes, type, precisions) { OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); - MatMulFunction::validate_precisions(precisions); } + protected: std::shared_ptr initOriginal() const override; - - std::vector precisions; }; // Quantized MatMul // FQ[I8] // Add -class MatMulBiasQuantizedFunction : public SnippetsFunctionBase { +class MatMulBiasQuantizedFunction : public MatMulFunctionBase { public: - explicit MatMulBiasQuantizedFunction(const std::vector& inputShapes, const std::vector& precisions) - : SnippetsFunctionBase(inputShapes), precisions(precisions) { + explicit MatMulBiasQuantizedFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type) + : MatMulFunctionBase(inputShapes, type, precisions) { OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); - MatMulFunction::validate_precisions(precisions); } + protected: std::shared_ptr initOriginal() const override; - - std::vector precisions; }; // Quantized MatMul FQ[I8] @@ -91,17 +106,21 @@ class MatMulBiasQuantizedFunction : public SnippetsFunctionBase { // \ / // MatMul // FQ[I8] -class MatMulsQuantizedFunction : public SnippetsFunctionBase { +class MatMulsQuantizedFunction : public MatMulFunctionBase { public: - explicit MatMulsQuantizedFunction(const std::vector& inputShapes, const std::vector& precisions) - : SnippetsFunctionBase(inputShapes), precisions(precisions) { + explicit MatMulsQuantizedFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type) + : MatMulFunctionBase(inputShapes, type, precisions) { OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); - MatMulFunction::validate_precisions(precisions); } + + std::set get_constant_input_idces() const override { + return matmul_type == MatMulType::FullyConnected ? std::set{1, 2} : std::set{}; + } + protected: std::shared_ptr initOriginal() const override; - - std::vector precisions; }; /// Minimal graph to test MatMul+Transpose combinations. Transpose location is specified via the position argument: @@ -111,44 +130,45 @@ class MatMulsQuantizedFunction : public SnippetsFunctionBase { // Transpose / // Matmul // Result -class Transpose0213MatMulFunction : public SnippetsFunctionBase { +class Transpose0213MatMulFunction : public MatMulFunctionBase { public: explicit Transpose0213MatMulFunction(const std::vector& inputShapes, const std::vector& precisions, - size_t position = 0) - : SnippetsFunctionBase(inputShapes), transpose_position(position), precisions(precisions) { + MatMulType type, size_t position = 0) + : MatMulFunctionBase(inputShapes, type, precisions), transpose_position(position) { OPENVINO_ASSERT(input_shapes.size() == 2, "Got invalid number of input shapes"); - OPENVINO_ASSERT(input_shapes[0].rank().get_length() == 4 && input_shapes[1].rank().get_length() == 4, - "Only rank 4 input shapes are supported by this test"); + OPENVINO_ASSERT(input_shapes[0].size() == 4, "Only rank 4 input shapes are supported by this test"); + if (position == 1) { + OPENVINO_ASSERT(input_shapes[1].size() == 4, "Only rank 4 input shapes are supported by this test"); + OPENVINO_ASSERT(type == MatMulType::MatMul, "Transpose on B input is not supported for FullyConnected type"); + } OPENVINO_ASSERT(transpose_position >=0 && transpose_position <= 2, "Got invalid transpose position"); - MatMulFunction::validate_precisions(precisions); } protected: std::shared_ptr initOriginal() const override; size_t transpose_position; - std::vector precisions; }; -class TransposeMatMulFunction : public SnippetsFunctionBase { +class TransposeMatMulFunction : public MatMulFunctionBase { public: - explicit TransposeMatMulFunction(const std::vector& inputShapes) : SnippetsFunctionBase(inputShapes) { + explicit TransposeMatMulFunction(const std::vector& inputShapes) : MatMulFunctionBase(inputShapes, MatMulType::MatMul) { OPENVINO_ASSERT(input_shapes.size() == 2, "Got invalid number of input shapes"); } protected: std::shared_ptr initOriginal() const override; }; -class TransposeMatMulBiasFunction : public SnippetsFunctionBase { +class TransposeMatMulBiasFunction : public MatMulFunctionBase { public: - explicit TransposeMatMulBiasFunction(const std::vector& inputShapes) : SnippetsFunctionBase(inputShapes) { + explicit TransposeMatMulBiasFunction(const std::vector& inputShapes) : MatMulFunctionBase(inputShapes, MatMulType::MatMul) { OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); } protected: std::shared_ptr initOriginal() const override; }; -class TransposeMulMatMulBiasFunction : public SnippetsFunctionBase { +class TransposeMulMatMulBiasFunction : public MatMulFunctionBase { public: - explicit TransposeMulMatMulBiasFunction(const std::vector& inputShapes) : SnippetsFunctionBase(inputShapes) { + explicit TransposeMulMatMulBiasFunction(const std::vector& inputShapes) : MatMulFunctionBase(inputShapes, MatMulType::MatMul) { OPENVINO_ASSERT(input_shapes.size() == 4, "Got invalid number of input shapes"); } protected: @@ -160,17 +180,65 @@ class TransposeMulMatMulBiasFunction : public SnippetsFunctionBase { // FQ[U8] / // MatMul // FQ[I8] -class MatMulsQuantizedSoftmaxFunction : public SnippetsFunctionBase { +class MatMulsQuantizedSoftmaxFunction : public MatMulFunctionBase { public: - explicit MatMulsQuantizedSoftmaxFunction(const std::vector& inputShapes, const std::vector& precisions) - : SnippetsFunctionBase(inputShapes), precisions(precisions) { + explicit MatMulsQuantizedSoftmaxFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type) + : MatMulFunctionBase(inputShapes, type, precisions) { OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); - MatMulFunction::validate_precisions(precisions); } + + std::set get_constant_input_idces() const override { + return matmul_type == MatMulType::FullyConnected ? std::set{1, 2} : std::set{}; + } + protected: std::shared_ptr initOriginal() const override; +}; - std::vector precisions; +// MatMul +// | | +// | Eltwise chain +// \ / +// Add +class MatMulEltwiseChainFunction : public MatMulFunctionBase { +public: + explicit MatMulEltwiseChainFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type) + : MatMulFunctionBase(inputShapes, type, precisions) { + OPENVINO_ASSERT(input_shapes.size() == 2, "Got invalid number of input shapes"); + } + +protected: + std::shared_ptr initOriginal() const override; +}; + +// MatMul +// | | +// | Eltwise chain +// \ / +// Add +// | +// MatMul +// | +// Eltwise chain +class MatMulEltwiseChainCascadeFunction : public MatMulFunctionBase { +public: + explicit MatMulEltwiseChainCascadeFunction(const std::vector& inputShapes, + const std::vector& precisions, + MatMulType type) + : MatMulFunctionBase(inputShapes, type, precisions) { + OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); + } + + std::set get_constant_input_idces() const override { + return matmul_type == MatMulType::FullyConnected ? std::set{1, 2} : std::set{}; + } + +protected: + std::shared_ptr initOriginal() const override; }; } // namespace snippets diff --git a/src/tests/ov_helpers/ov_snippets_models/src/snippets_helpers.cpp b/src/tests/ov_helpers/ov_snippets_models/src/snippets_helpers.cpp index ce70de34133016..7ec89ad562074b 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/snippets_helpers.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/snippets_helpers.cpp @@ -9,14 +9,24 @@ namespace ov { namespace test { namespace snippets { +void SnippetsFunctionBase::validate_params_shape(const std::vector& input_shapes, + const ov::ParameterVector& params) { + OPENVINO_ASSERT(params.size() == input_shapes.size(), + "Passed input shapes and produced function are inconsistent: number of params mismatch. Expected: ", + input_shapes.size(), ", actual: ", params.size()); + for (size_t i = 0; i < input_shapes.size(); i++) { + const auto& cur_shape = params[i]->get_partial_shape(); + OPENVINO_ASSERT(input_shapes[i] == cur_shape, + "Passed input shapes (", input_shapes[i], + ") and produced function shape(", cur_shape, + ") are inconsistent."); + } +} + void SnippetsFunctionBase::validate_function(const std::shared_ptr &f) const { OPENVINO_ASSERT(f != nullptr, "The test requires Model to be defined"); const auto ¶ms = f->get_parameters(); - OPENVINO_ASSERT(params.size() == input_shapes.size(), - "Passed input shapes and produced function are inconsistent."); - for (size_t i = 0; i < input_shapes.size(); i++) - OPENVINO_ASSERT(std::equal(input_shapes[i].begin(), input_shapes[i].end(), params[i]->get_partial_shape().begin()), - "Passed input shapes and produced function are inconsistent."); + validate_params_shape(input_shapes, params); } SnippetsFunctionCustomizable::SnippetsFunctionCustomizable(const std::vector& inputShapes, diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp index 5cd1eb47800d0b..2a7442cf255fc5 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp @@ -12,9 +12,81 @@ namespace ov { namespace test { namespace snippets { +namespace { +std::shared_ptr make_matmul_b_input(const ov::element::Type& precision, + const ov::PartialShape& shape, + MatMulType type, + ov::ParameterVector& params) { + std::shared_ptr result; + switch (type) { + case MatMulType::FullyConnected: + return ov::test::utils::make_constant(precision, shape.to_shape()); + case MatMulType::MatMul: { + auto param = std::make_shared(precision, shape); + params.push_back(param); + return param; + } + default: + OPENVINO_THROW("Unexpected MatMulType is passed in make_matmul_b_input"); + } +} + +std::shared_ptr make_fake_quantize(const ov::Output& in, bool signed_interval) { + static const float i8_fq_il = -35.0172004; + static const float i8_fq_ih = 34.7436294; + static const float u8_fq_il = 0; + static const float u8_fq_ih = 0.820726; + const auto low = signed_interval ? i8_fq_il : u8_fq_il; + const auto high = signed_interval ? i8_fq_ih : u8_fq_ih; + return ov::test::utils::make_fake_quantize(in, ov::element::f32, 256, {1}, {low}, {high}, {low}, {high}); +} +} // namespace + +std::ostream &operator<<(std::ostream& os, MatMulType type) { + switch (type) { + case MatMulType::MatMul: + return os << "MatMul"; + case MatMulType::FullyConnected: + return os << "FullyConnected"; + default: + OPENVINO_THROW("Unexpected MatMulType."); + } +} + +MatMulFunctionBase::MatMulFunctionBase(const std::vector& inputShapes, + MatMulType type, + const std::vector& precisions) + : SnippetsFunctionBase(inputShapes), + precisions(precisions), + matmul_type(type) { + if (!precisions.empty()) { + OPENVINO_ASSERT(precisions.size() == 2, "Got invalid number of input element types"); + const bool is_f32 = ov::snippets::utils::everyone_is(element::f32, precisions[0], precisions[1]); + const bool is_int8 = ov::snippets::utils::one_of(precisions[0], element::i8, element::u8) && precisions[1] == element::i8; + const bool is_bf16 = ov::snippets::utils::everyone_is(element::bf16, precisions[0], precisions[1]); + OPENVINO_ASSERT(is_f32 || is_bf16 || is_int8, "Invalid precisions"); + } +} + +void MatMulFunctionBase::validate_function(const std::shared_ptr &f) const { + OPENVINO_ASSERT(f != nullptr, "The test requires Model to be defined"); + const auto count_of_shapes = input_shapes.size(); + const auto idces_to_remove = get_constant_input_idces(); + OPENVINO_ASSERT(std::all_of(idces_to_remove.begin(), idces_to_remove.end(), [&count_of_shapes](size_t x) { return x < count_of_shapes; }), + "constant_input_idces must be less than input shapes size"); + + std::vector shapes_to_check; + for (size_t i = 0; i < input_shapes.size(); ++i) { + if (idces_to_remove.count(i) == 0) + shapes_to_check.push_back(input_shapes[i]); + } + SnippetsFunctionBase::validate_params_shape(shapes_to_check, f->get_parameters()); +} + std::shared_ptr MatMulFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); - auto data1 = std::make_shared(precisions[1], input_shapes[1]); + ov::ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); std::shared_ptr matmul; if (precisions[1] == ov::element::i8) { matmul = std::make_shared>( @@ -26,11 +98,12 @@ std::shared_ptr MatMulFunction::initOriginal() const { } else { matmul = std::make_shared(data0, data1, false, transpose_b); } - return std::make_shared(NodeVector{matmul}, ParameterVector{data0, data1}); + return std::make_shared(NodeVector{matmul}, params); } std::shared_ptr MatMulFunction::initReference() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); - auto data1 = std::make_shared(precisions[1], input_shapes[1]); + ov::ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); auto indata0 = std::make_shared(precisions[0], data0->get_output_partial_shape(0)); auto indata1 = std::make_shared(precisions[1], data1->get_output_partial_shape(0)); std::shared_ptr matmul; @@ -47,22 +120,20 @@ std::shared_ptr MatMulFunction::initReference() const { const auto subgraph = std::make_shared(NodeVector{data0, data1}, std::make_shared(NodeVector{matmul}, ParameterVector{indata0, indata1})); - return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); + return std::make_shared(NodeVector{subgraph}, params); } std::shared_ptr FQMatMulFunction::initOriginal() const { auto const_order = std::make_shared(ov::element::i32, Shape {4}, std::vector{0, 2, 1, 3}); auto data0 = std::make_shared(precision, input_shapes[0]); - auto ih = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{34.7436294}); - auto il = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{-35.0172004}); - auto oh = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{34.7436294}); - auto ol = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{-35.0172004}); - auto fq = std::make_shared(data0, il, ih, ol, oh, 256); - std::shared_ptr in0 = fq; + ParameterVector params{data0}; + + auto in0 = make_fake_quantize(data0, true); if (pos == 0) { in0 = std::make_shared(in0, const_order); } - auto constant = ov::test::utils::make_constant(ov::element::i8, const_shape.get_shape()); - auto convert = std::make_shared(constant, ov::element::f32); + + auto data1 = make_matmul_b_input(ov::element::i8, input_shapes[1], matmul_type, params); + auto convert = std::make_shared(data1, ov::element::f32); auto deq_mul = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{0.00499185826}); auto mul = std::make_shared(convert, deq_mul); std::shared_ptr in1 = mul; @@ -74,12 +145,15 @@ std::shared_ptr FQMatMulFunction::initOriginal() const { if (pos == 2) { out = std::make_shared(out, const_order); } - return std::make_shared(NodeVector{out}, ParameterVector{data0}); + return std::make_shared(NodeVector{out}, params); } std::shared_ptr MatMulBiasFunction::initOriginal() const { auto data0 = std::make_shared(precision, input_shapes[0]); - auto data1 = std::make_shared(precision, input_shapes[1]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precision, input_shapes[1], matmul_type, params); auto data2 = std::make_shared(precision, input_shapes[2]); + params.push_back(data2); + std::shared_ptr matmul; if (precisions[1] == ov::element::i8) { matmul = std::make_shared>( @@ -91,32 +165,36 @@ std::shared_ptr MatMulBiasFunction::initOriginal() const { matmul = std::make_shared(data0, data1); } auto bias = std::make_shared(matmul, data2); - return std::make_shared(NodeVector{bias}, ParameterVector{data0, data1, data2}); + return std::make_shared(NodeVector{bias}, params); } std::shared_ptr MatMulBiasQuantizedFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); - auto data1 = std::make_shared(precisions[1], input_shapes[1]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); auto data2 = std::make_shared(precision, input_shapes[2]); + params.push_back(data2); + auto matmul = std::make_shared>( std::vector{ov::element::f32, element::f32}, std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); - auto fq2 = ov::test::utils::make_fake_quantize(matmul, ov::element::f32, 256, {1}, {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + auto fq2 = make_fake_quantize(matmul, true); auto bias = std::make_shared(fq2, data2); - return std::make_shared(NodeVector{bias}, ParameterVector{data0, data1, data2}); + return std::make_shared(NodeVector{bias}, params); } std::shared_ptr MatMulsQuantizedFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); - auto data1 = std::make_shared(precisions[1], input_shapes[1]); - auto data2 = std::make_shared(precision, input_shapes[2]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); + auto data2 = make_matmul_b_input(precision, input_shapes[2], matmul_type, params); auto matmul0 = std::make_shared>( std::vector{ov::element::f32, element::f32}, std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); - auto fq0 = ov::test::utils::make_fake_quantize(matmul0, ov::element::f32, 256, {1}, {0}, {0.820726}, {0}, {0.820726}); - auto fq2 = ov::test::utils::make_fake_quantize(data2, ov::element::f32, 256, {1}, {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + auto fq0 = make_fake_quantize(matmul0, false); + auto fq2 = make_fake_quantize(data2, true); auto new_shape = std::make_shared(ov::element::u64, ov::Shape{4}, std::vector{1, 1, input_shapes[2].get_shape()[0], input_shapes[2].get_shape()[1]}); auto reshape = std::make_shared(fq2, new_shape, false); @@ -125,19 +203,13 @@ std::shared_ptr MatMulsQuantizedFunction::initOriginal() const { std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(fq0, element::f32).get(), ov::op::TemporaryReplaceOutputType(reshape, element::f32).get()); - auto fq3 = ov::test::utils::make_fake_quantize(matmul1, - ov::element::f32, - 256, - {1}, - {-35.0172004}, - {34.7436294}, - {-35.0172004}, - {34.7436294}); - return std::make_shared(NodeVector{fq3}, ParameterVector{data0, data1, data2}); + auto fq3 = make_fake_quantize(matmul1, true); + return std::make_shared(NodeVector{fq3}, params); } std::shared_ptr Transpose0213MatMulFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); - auto data1 = std::make_shared(precisions[1], input_shapes[1]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); auto const_order = std::make_shared(ov::element::i32, Shape {4}, std::vector{0, 2, 1, 3}); std::shared_ptr result; switch (transpose_position) { @@ -180,7 +252,7 @@ std::shared_ptr Transpose0213MatMulFunction::initOriginal() const { break; } } - return std::make_shared(NodeVector{result}, ParameterVector{data0, data1}); + return std::make_shared(NodeVector{result}, params); } std::shared_ptr TransposeMatMulFunction::initOriginal() const { @@ -215,16 +287,17 @@ std::shared_ptr TransposeMulMatMulBiasFunction::initOriginal() const } std::shared_ptr MatMulsQuantizedSoftmaxFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); - auto data1 = std::make_shared(precisions[1], input_shapes[1]); - auto data2 = std::make_shared(precision, input_shapes[2]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); + auto data2 = make_matmul_b_input(precision, input_shapes[2], matmul_type, params); auto matmul0 = std::make_shared>( std::vector{ov::element::f32, element::f32}, std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); auto softmax = std::make_shared(matmul0, -1); - auto fq0 = ov::test::utils::make_fake_quantize(softmax, ov::element::f32, 256, {1}, {0}, {0.820726}, {0}, {0.820726}); - auto fq2 = ov::test::utils::make_fake_quantize(data2, ov::element::f32, 256, {1}, {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + auto fq0 = make_fake_quantize(softmax, false); + auto fq2 = make_fake_quantize(data2, true); auto new_shape = std::make_shared(ov::element::u64, ov::Shape{4}, std::vector{1, 1, input_shapes[2].get_shape()[0], input_shapes[2].get_shape()[1]}); auto reshape = std::make_shared(fq2, new_shape, false); @@ -233,15 +306,71 @@ std::shared_ptr MatMulsQuantizedSoftmaxFunction::initOriginal() const std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(fq0, element::f32).get(), ov::op::TemporaryReplaceOutputType(reshape, element::f32).get()); - auto fq3 = ov::test::utils::make_fake_quantize(matmul1, - ov::element::f32, - 256, - {1}, - {-35.0172004}, - {34.7436294}, - {-35.0172004}, - {34.7436294}); - return std::make_shared(NodeVector{fq3}, ParameterVector{data0, data1, data2}); + auto fq3 = make_fake_quantize(matmul1, true); + return std::make_shared(NodeVector{fq3}, params); +} + +std::shared_ptr MatMulEltwiseChainFunction::initOriginal() const { + auto data0 = std::make_shared(precision, input_shapes[0]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precision, input_shapes[1], matmul_type, params); + + const auto matmul = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{element::f32}, + ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), + ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); + + auto scale = ov::test::utils::make_constant(precision, {}); + auto mul = std::make_shared(matmul, scale); + + ov::Shape bias_shape(matmul->get_output_partial_shape(0).size(), 1); + auto OC = *matmul->get_output_partial_shape(0).rbegin(); + if (OC.is_static()) + bias_shape.back() = OC.get_length(); + auto bias = ov::test::utils::make_constant(precision, bias_shape); + auto bias_op = std::make_shared(mul, bias); + + auto add = std::make_shared(matmul, bias_op); + return std::make_shared(NodeVector{add}, params); +} + +std::shared_ptr MatMulEltwiseChainCascadeFunction::initOriginal() const { + auto data0 = std::make_shared(precision, input_shapes[0]); + ParameterVector params{data0}; + auto data1 = make_matmul_b_input(precision, input_shapes[1], matmul_type, params); + auto data2 = make_matmul_b_input(precision, input_shapes[2], matmul_type, params); + + const auto matmul1 = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{element::f32}, + ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), + ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); + + auto build_eltwise_chain = [&](const ov::Output& out) { + auto scale = ov::test::utils::make_constant(precision, {}); + auto mul = std::make_shared(out, scale); + + ov::Shape bias_shape(out.get_partial_shape().size(), 1); + auto OC = *out.get_partial_shape().rbegin(); + if (OC.is_static()) + bias_shape.back() = OC.get_length(); + auto bias = ov::test::utils::make_constant(precision, bias_shape); + auto bias_op = std::make_shared(mul, bias); + return bias_op; + }; + + auto eltwise_chain_1 = build_eltwise_chain(matmul1); + auto add = std::make_shared(matmul1, eltwise_chain_1); + + const auto matmul2 = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{element::f32}, + ov::op::TemporaryReplaceOutputType(add, element::f32).get(), + ov::op::TemporaryReplaceOutputType(data2, element::f32).get()); + + auto eltwise_chain_2 = build_eltwise_chain(matmul2); + return std::make_shared(NodeVector{eltwise_chain_2}, params); } } // namespace snippets