diff --git a/src/common/snippets/include/snippets/pass/fc_tokenization.hpp b/src/common/snippets/include/snippets/pass/fc_tokenization.hpp new file mode 100644 index 00000000000000..40505607341ba4 --- /dev/null +++ b/src/common/snippets/include/snippets/pass/fc_tokenization.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "snippets/pass/tokenization.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface TokenizeFCSnippets + * @brief The pass tokenizes FullyConnected like (with constant path on B input) MatMuls + * @ingroup snippets + */ +class TokenizeFCSnippets: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("TokenizeFCSnippets", "0"); + TokenizeFCSnippets(const SnippetsTokenization::Config& config); +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/src/pass/fc_tokenization.cpp b/src/common/snippets/src/pass/fc_tokenization.cpp new file mode 100644 index 00000000000000..4515973fccb0f3 --- /dev/null +++ b/src/common/snippets/src/pass/fc_tokenization.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/fc_tokenization.hpp" + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "snippets/itt.hpp" +#include "snippets/op/subgraph.hpp" + +ov::snippets::pass::TokenizeFCSnippets::TokenizeFCSnippets(const SnippetsTokenization::Config& config) { + MATCHER_SCOPE(TokenizeFCSnippets); + + // TODO: extend constant path coverage: + // 1. Add u8/i8/bf16 precisions + // 2. Add subgraphs (Transpose/Convert) + // 3. Add Decompression subgraphs support (and all the possible compressed weights related precisions) + auto constant = ov::pass::pattern::wrap_type(ov::pass::pattern::type_matches(ov::element::f32)); + auto m_matmul = ov::pass::pattern::wrap_type({ov::pass::pattern::any_input(), constant}); + + register_matcher(std::make_shared(m_matmul, matcher_name), + [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher &m) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::TokenizeFCSnippets") + const auto& pattern_map = m.get_pattern_value_map(); + const auto matmul = pattern_map.at(m_matmul).get_node_shared_ptr(); + const auto subgraph = op::Subgraph::wrap_node_as_subgraph(matmul); + subgraph->get_rt_info()["originalLayersNames"] = matmul->get_friendly_name(); + // MatMul weights are stored outside the subgraph + subgraph->set_virtual_port_count(1); + op::update_out_tensor_name(subgraph); + ov::replace_node(matmul, subgraph); + return true; + }); +} diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp index 43733fc196ee83..600db8cbde0fce 100644 --- a/src/common/snippets/src/pass/tokenization.cpp +++ b/src/common/snippets/src/pass/tokenization.cpp @@ -2,16 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" +#include "snippets/pass/tokenization.hpp" +#include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/manager.hpp" -#include "snippets/pass/tokenization.hpp" +#include "snippets/itt.hpp" +#include "snippets/pass/collapse_subgraph.hpp" #include "snippets/pass/common_optimizations.hpp" #include "snippets/pass/extract_reshapes_from_mha.hpp" -#include "snippets/pass/mha_tokenization.hpp" +#include "snippets/pass/fc_tokenization.hpp" #include "snippets/pass/gn_tokenization.hpp" -#include "snippets/pass/collapse_subgraph.hpp" - +#include "snippets/pass/mha_tokenization.hpp" namespace ov { namespace snippets { @@ -81,9 +82,15 @@ bool SnippetsTokenization::run_on_model(const std::shared_ptr& m) { manager.register_pass(); manager.register_pass(); + // This pass mustn't be registered in GraphRewrite with other tokenization passes + // since it changes the nodes after the matched root node manager.register_pass(m_config); - manager.register_pass(); - manager.register_pass(m_config); + + auto tokenization_passes = manager.register_pass(); + tokenization_passes->add_matcher(); + tokenization_passes->add_matcher(m_config); + tokenization_passes->add_matcher(m_config); + manager.register_pass(m_config); manager.run_passes(m); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index fd31d6b2fa54a3..06341f953987c2 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -142,6 +142,7 @@ // Snippets #include "snippets/pass/tokenization.hpp" #include "snippets/pass/mha_tokenization.hpp" +#include "snippets/pass/fc_tokenization.hpp" #include "snippets/pass/collapse_subgraph.hpp" #include "snippets/pass/common_optimizations.hpp" #include "snippets/pass/split_dimension_m.hpp" @@ -928,6 +929,8 @@ void Transformations::MainSnippets(void) { CPU_REGISTER_PASS_ARM(snippetsManager, SnippetsMarkSkipped); #else CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, inferencePrecision == ov::element::bf16); + // TODO: remove + CPU_DISABLE_PASS_COMMON(snippetsManager, snippets::pass::TokenizeFCSnippets); #endif } CPU_REGISTER_PASS_X64(snippetsManager, snippets::pass::SnippetsTokenization, tokenization_config);