openvinotoolkit · xipingyan · May 14, 2024 · May 14, 2024 · May 14, 2024 · Nov 20, 2023
diff --git a/...ansformations/include/transformations/common_optimizations/matmul_split_decomposition.hpp b/...ansformations/include/transformations/common_optimizations/matmul_split_decomposition.hpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API MatmulGatherDecomposition;
+
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ov_transformation_common_api
+ * @brief MatmulGatherDecomposition transformation matches following graph:
+ *
+ *         +----------+
+ *         |  input   |
+ *         +----------+
+ *              |
+ *              v
+ *         +----------+
+ *         |  MatMul  |
+ *         +----------+
+ *              |
+ *              v
+ *         +------------+
+ *         | Some nodes |
+ *         +------------+
+ *              |
+ *              v
+ *         +-----------------------+
+ *         |       Transpose       |
+ *         +-----------------------+
+ *          |          |          |
+ *          v          v          v
+ *     +-------+   +-------+   +-------+
+ *     |Gather |   |Gather |   |Gather |
+ *     +-------+   +-------+   +-------+
+ * and replaces with:
+ *
+ *         +-----------------------+
+ *         |       input           |
+ *         +-----------------------+
+ *          |          |          |
+ *          v          v          v
+ *     +-------+   +-------+   +-------+
+ *     |MatMul |   |MatMul |   |MatMul |
+ *     +-------+   +-------+   +-------+
+ *          |          |          |
+ *          v          v          v
+ *     +-------+   +-------+   +-------+
+ *     |Nodes  |   |Nodes  |   |Nodes  |
+ *     +-------+   +-------+   +-------+
+ *          |          |          |
+ *          v          v          v
+ *   +---------+  +---------+  +---------+
+ *   |Transpose|  |Transpose|  |Transpose|
+ *   +---------+  +---------+  +---------+
+ */
+class ov::pass::MatmulGatherDecomposition : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("MatmulGatherDecomposition", "0");
+    MatmulGatherDecomposition();
+    void split_weights(const Output<Node>& weights,
+                       OutputVector& new_weights,
+                       Output<Node>* bias,
+                       OutputVector& new_bias,
+                       const bool& transpos_b);
+};
diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -34,6 +34,7 @@
 #include "transformations/common_optimizations/lin_op_sequence_fusion.hpp"
 #include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
 #include "transformations/common_optimizations/matmul_multiply_fusion.hpp"
+#include "transformations/common_optimizations/matmul_split_decomposition.hpp"
 #include "transformations/common_optimizations/moc_transformations.hpp"
 #include "transformations/common_optimizations/mul_conv_fusion.hpp"
 #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"

diff --git a/...n/transformations/src/transformations/common_optimizations/matmul_split_decomposition.cpp b/...n/transformations/src/transformations/common_optimizations/matmul_split_decomposition.cpp
@@ -0,0 +1,207 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/matmul_split_decomposition.hpp"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <openvino/core/rt_info.hpp>
+#include <openvino/opsets/opset13.hpp>
+#include <openvino/opsets/opset6.hpp>
+#include <openvino/opsets/opset8.hpp>
+#include <openvino/pass/pattern/op/or.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
+#include <transformations/utils/utils.hpp>
+#include <vector>
+
+#include "itt.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/transpose.hpp"
+#include "openvino/op/util/gather_base.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/type_relaxed.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace ov::op;
+using namespace ov;
+using namespace ov::pass::pattern;
+
+void pass::MatmulGatherDecomposition::split_weights(const Output<Node>& weights,
+                                                    OutputVector& new_weights,
+                                                    Output<Node>* bias,
+                                                    OutputVector& new_bias,
+                                                    const bool& transpos_b) {
+    const auto& weights_shape = weights.get_partial_shape();
+    int64_t weights_rank = static_cast<int64_t>(weights_shape.rank().get_length());
+
+    if (bias) {
+        const auto& bias_shape = bias->get_partial_shape();
+        int64_t bias_rank = static_cast<int64_t>(bias_shape.rank().get_length());
+        if (weights_rank != 2 || (bias_rank != 3 && bias_rank != 1)) {
+            return;
+        }
+    }
+
+    // Decompose weights
+    auto axis = register_new_node(v0::Constant::create(element::i32, Shape{}, {transpos_b ? 0 : 1}));
+    auto split = register_new_node<opset1::Split>(weights, axis, 3);
+    for (auto& out : split->outputs()) {
+        new_weights.emplace_back(out);
+    }
+
+    if (bias) {
+        // Decompose bias
+        auto axis2 = register_new_node(v0::Constant::create(element::i32, Shape{}, {-1}));  // axis -1
+        auto split2 = register_new_node<opset1::Split>(*bias, axis2, 3);
+        for (auto& out : split2->outputs()) {
+            new_bias.emplace_back(out);
+        }
+    }
+}
+
+pass::MatmulGatherDecomposition::MatmulGatherDecomposition() {
+    MATCHER_SCOPE(MatmulGatherDecomposition);
+    auto input_pattern = any_input();
+    auto matmul_pattern = wrap_type<opset1::MatMul>({input_pattern, any_input()});
+
+    auto bias_pattern = wrap_type<opset1::Constant>();
+    auto add_pattern = wrap_type<opset1::Add>({matmul_pattern, bias_pattern});
+
+    auto reshape_productor_pattern = std::make_shared<pattern::op::Or>(OutputVector{matmul_pattern, add_pattern});
+
+    auto reshape_pattern = wrap_type<opset1::Reshape>({reshape_productor_pattern, any_input()});
+    auto transpose_pattern = wrap_type<opset6::Transpose>({reshape_pattern, any_input()});
+    auto reshape2_pattern = wrap_type<opset1::Reshape>({reshape_pattern, any_input()});
+
+    auto reshape_or_transpose_pattern =
+        std::make_shared<pattern::op::Or>(OutputVector{reshape2_pattern, transpose_pattern});
+
+    matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+
+        // Heuristics: there should be only 3 gathers to split
+        auto root_node = m.get_match_root();
+        bool have_transpose = as_type<opset1::Transpose>(root_node.get()) != nullptr;
+        auto children = root_node->get_output_target_inputs(0);
+        if (children.size() != 3u) {
+            return false;
+        }
+
+        auto matmul = pattern_map.at(matmul_pattern).get_node_shared_ptr();
+        auto weights = matmul->input_value(1);
+        std::shared_ptr<ov::Node> add = nullptr;
+        bool have_bias = false;
+        for (auto& consumer : matmul->get_output_target_inputs(0)) {
+            if (ov::is_type<opset1::Add>(consumer.get_node()->shared_from_this())) {
+                add = pattern_map.at(add_pattern).get_node_shared_ptr();
+                have_bias = true;
+                break;
+            }
+        }
+        const bool& transpose_b = as_type_ptr<opset1::MatMul>(matmul)->get_transpose_b();
+        const auto& reshape = pattern_map.at(reshape_pattern);
+        auto concat = reshape.get_node_shared_ptr()->input_value(1);
+
+        NodeVector gathers, fake_quantizes;
+        gathers.resize(3);
+        fake_quantizes.resize(3);
+        for (auto& child : children) {
+            std::shared_ptr<ov::Node> fq = nullptr;
+            auto gather = child.get_node()->shared_from_this();
+            if (ov::is_type<opset1::FakeQuantize>(gather)) {
+                fq = gather;
+                gather = gather->get_output_target_inputs(0).begin()->get_node()->shared_from_this();
+            }
+            if (ov::is_type<ov::op::util::GatherBase>(gather)) {
+                const auto axis_node = as_type_ptr<opset6::Constant>(gather->input_value(2).get_node_shared_ptr());
+                if (axis_node) {
+                    const auto& axis_val = axis_node->cast_vector<int32_t>();
+                    if (axis_val.size() != 1u || axis_val[0] != 0) {
+                        return false;
+                    }
+                } else {
+                    return false;
+                }
+
+                const auto indices_node = as_type_ptr<opset6::Constant>(gather->input_value(1).get_node_shared_ptr());
+                if (indices_node) {
+                    const auto& indices_val = indices_node->cast_vector<int32_t>();
+                    if (indices_val.size() != 1) {
+                        return false;
+                    }
+                    if (indices_val[0] < 0 || indices_val[0] >= 3) {
+                        return false;
+                    }
+                    gathers[indices_val[0]] = gather;
+                    fake_quantizes[indices_val[0]] = fq;
+                } else {
+                    return false;
+                }
+            } else {
+                return false;
+            }
+        }
+
+        if (std::any_of(gathers.begin(), gathers.end(), [](const std::shared_ptr<Node> node_ptr) {
+                return !node_ptr || !is_type<ov::op::util::GatherBase>(node_ptr);
+            })) {
+            return false;
+        }
+
+        Output<Node> bias;
+        OutputVector new_weights, new_bias;
+        if (have_bias) {
+            bias = pattern_map.at(bias_pattern);
+        }
+        split_weights(weights, new_weights, have_bias ? &bias : nullptr, new_bias, transpose_b);
+        if (new_weights.size() != 3u || (have_bias && new_bias.size() != 3u)) {
+            return false;
+        }
+
+        auto const_indices = register_new_node(v0::Constant::create(element::i32, Shape{4}, {0, 1, 3, 4}));
+        auto const_axis = register_new_node(v0::Constant::create(element::i32, Shape{}, {0}));
+        auto new_shape = register_new_node<v1::Gather>(concat, const_indices, const_axis);
+        const auto& input = pattern_map.at(input_pattern);
+        for (size_t i = 0; i < 3u; i++) {
+            auto new_mm = register_new_node<v0::MatMul>(input, new_weights[i], false, transpose_b);
+            std::shared_ptr<ov::Node> reshape_productor = new_mm;
+            if (have_bias) {
+                reshape_productor = register_new_node<v1::Add>(new_mm, new_bias[i]);
+            }
+            auto new_reshape = register_new_node<v1::Reshape>(reshape_productor, new_shape, true);
+            ov::NodeVector from_nodes = {gathers[i], weights.get_node_shared_ptr(), matmul};
+            if (have_bias) {
+                from_nodes.emplace_back(add);
+                from_nodes.emplace_back(pattern_map.at(bias_pattern).get_node_shared_ptr());
+            }
+            if (have_transpose)
+                from_nodes.emplace_back(root_node);
+
+            copy_runtime_info(from_nodes, get_new_nodes());
+            auto transpose_order = register_new_node(v0::Constant::create(element::i32, Shape{4}, {0, 2, 1, 3}));
+            auto new_transpose = register_new_node<v1::Transpose>(new_reshape, transpose_order);
+            new_transpose->set_friendly_name(gathers[i]->get_friendly_name());
+
+            if (fake_quantizes[i]) {
+                fake_quantizes[i]->set_argument(0, new_transpose);
+                replace_node(gathers[i], fake_quantizes[i]);
+            } else {
+                replace_node(gathers[i], new_transpose);
+            }
+        }
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(reshape_or_transpose_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -44,6 +44,7 @@
 #include "transformations/common_optimizations/lstm_cell_fusion.hpp"
 #include "transformations/common_optimizations/matmul_const_transposes_extraction.hpp"
 #include "transformations/common_optimizations/matmul_multiply_fusion.hpp"
+#include "transformations/common_optimizations/matmul_split_decomposition.hpp"
 #include "transformations/common_optimizations/mul_conv_fusion.hpp"
 #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
 #include "transformations/common_optimizations/mvn_fusion.hpp"
@@ -249,6 +250,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
     REGISTER_PASS(manager, ConvToBinaryConv)
 
     auto decomp = manager.register_pass<ov::pass::GraphRewrite>();
+    ADD_MATCHER(decomp, MatmulGatherDecomposition)
     ADD_MATCHER(decomp, BatchNormDecomposition)
     ADD_MATCHER(decomp, ConvertDivideWithConstant)
     ADD_MATCHER(decomp, ConvertSubtractWithConstant)

@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/matmul_split_decompose.hpp"
+
+using namespace ov::test;
+namespace {
+
+std::vector<MatMulGatherDecomposeShapeParams> mm_gather_shape_params = {
+    {{2, 5, 8}, {24, 8}, true, true, {1, 1, 24}, {2, 5, 3, 2, 4}},
+    {{1, 1, 8}, {24, 8}, true, false, {1, 1, 24}, {1, 1, 3, 2, 4}},
+    {{1, 2, 4}, {4, 12}, false, true, {1, 1, 12}, {1, 2, 3, 2, 2}},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatMulGatherDecompose,
+                         MatMulGatherDecompose,
+                         ::testing::Combine(::testing::ValuesIn(mm_gather_shape_params),
+                                            ::testing::Values(ov::test::utils::DEVICE_CPU),
+                                            ::testing::Values(false, true)),
+                         MatMulGatherDecompose::getTestCaseName);
+}  // namespace
@@ -0,0 +1,20 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "shared_test_classes/subgraph/matmul_split_decompose.hpp"
+
+namespace ov {
+namespace test {
+
+TEST_P(MatMulGatherDecompose, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED();
+    run();
+    check_results();
+}
+
+}  // namespace test
+}  // namespace ov
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <tuple>
+
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+namespace ov {
+namespace test {
+
+struct MatMulGatherDecomposeShapeParams {
+    ov::Shape input_shape;
+    ov::Shape weights_shape;
+    bool trans_b;
+    bool have_bias;
+    ov::Shape bias_shape;
+    ov::Shape reshape_shape;
+};
+
+typedef std::tuple<MatMulGatherDecomposeShapeParams,
+                   std::string,  // Device name
+                   bool          // Enable FakeQuantize
+                   >
+    MatMulGatherDecomposeParams;
+
+class MatMulGatherDecompose : public testing::WithParamInterface<MatMulGatherDecomposeParams>,
+                              virtual public ov::test::SubgraphBaseStaticTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<MatMulGatherDecomposeParams>& obj);
+
+protected:
+    void SetUp() override;
+    void check_results();
+};
+
+}  // namespace test
+}  // namespace ov