[Snippets] Introduced BufferExpression (#26413)

### Details: - *Created the first specialized expression: `BufferExpression` - now we don't have to cast the source node to `op::Buffer` to get attributes - all of them are stored in `BufferExpression` now* - *United `NewMemoryBuffer` and `IntermediateMemoryBuffer` ops into one `Buffer` since now they are distinguished only by argument count* - *Replaced the pass "SetBrgemmCopyBBuffersShape" with new "InsertBrgemmCopyBBuffers" which inserts `BrgemmCopyB`-specific `BufferExpression` after this op* ### Tickets: - *151198*
openvinotoolkit · Sep 17, 2024 · 0217615 · 0217615
1 parent 3cefe70
commit 0217615
Show file tree

Hide file tree

Showing 61 changed files with 1,215 additions and 926 deletions.
diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp
@@ -17,15 +17,18 @@ namespace ov {
 namespace snippets {
 namespace lowered {
 
+class ExpressionFactory;
 class LinearIR;
 using ExpressionPtr = std::shared_ptr<Expression>;
 using ExpressionMap = std::unordered_map<Expression*, ExpressionPtr>;
 class Expression : public std::enable_shared_from_this<Expression> {
     friend class LinearIR;
+    friend class ExpressionFactory;
     friend class ExpressionPort;
 
 public:
     Expression() = default;
+    virtual ~Expression() = default;
 
     std::shared_ptr<Node> get_node() const;
     std::shared_ptr<Emitter> get_emitter() const;
@@ -50,7 +53,8 @@ class Expression : public std::enable_shared_from_this<Expression> {
 
     void set_input_port_connector(size_t port, PortConnectorPtr to);
 
-    void validate() const;
+    // Attention! Cannot be called in ctor because this method validats port attributes (descs, connectors)
+    virtual void validate() const;
 
     ExpressionPort get_input_port(size_t i);
     ExpressionPort get_output_port(size_t i);
@@ -61,16 +65,52 @@ class Expression : public std::enable_shared_from_this<Expression> {
     bool needShapeInfer() const { return m_need_shape_infer; }
     const std::vector<size_t>& get_loop_ids() const;
     void set_loop_ids(const std::vector<size_t>& loops);
-    ExpressionPtr clone_with_new_inputs(const std::vector<PortConnectorPtr>& new_inputs,
-                                        const std::shared_ptr<Node>& new_node) const;
+
+    /**
+     * @brief Clone Expression with new node and input port attributes/
+     *        Output port descriptors will be cloned from the current expression.
+     *        Output port connecters will be created.
+     * @param new_node new node
+     * @param new_inputs new input port connectors
+     * @param new_in_descs new input port descriptors. If this collection is empty,
+     *                     descriptors will be copied from the current expression
+     * @return the copy
+     */
+    ExpressionPtr clone_with_new_inputs(const std::shared_ptr<Node>& new_node, const std::vector<PortConnectorPtr>& new_inputs,
+                                        const std::vector<PortDescriptorPtr>& new_in_descs = {}) const;
+    /**
+     * @brief Clone Expression with new node using `expr_map` to connect to new parent expressions.
+     * @param expr_map the map with the original and cloned expressions
+     * @param new_node new node
+     * @return the copy
+     */
     ExpressionPtr clone_with_new_inputs(const ExpressionMap& expr_map, const std::shared_ptr<Node>& new_node) const;
 
+    virtual bool visit_attributes(AttributeVisitor &visitor);
+
+    // Note that get_type_info_static and get_type_info are needed to mimic OPENVINO_RTTI interface,
+    // so the standard OPENVINO_RTTI(...) macros could be used in derived classes.
+    _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() {
+        static ::ov::DiscreteTypeInfo type_info_static {"Expression"};
+        type_info_static.hash();
+        return type_info_static;
+    }
+
+    virtual const DiscreteTypeInfo& get_type_info() const {
+        return get_type_info_static();
+    }
+
+    const char* get_type_name() const {
+        return get_type_info().name;
+    }
+
 protected:
-    Expression(const Expression& other);
     // Note: The constructor initialization is private since an expression can be created only by Linear IR.
     //       The method must be used only by Linear IR builder of expressions!
     Expression(const std::shared_ptr<Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& factory, bool need_shape_infer = true);
-    void update_node_and_connectors(const std::vector<PortConnectorPtr>& new_inputs, const std::shared_ptr<Node>& new_node);
+
+    // Virtual clone method which is called in clone_with_new_inputs with common logic
+    virtual ExpressionPtr clone() const;
 
     std::shared_ptr<Node> m_source_node{nullptr};
     std::shared_ptr<Emitter> m_emitter{nullptr};

diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp
@@ -4,65 +4,72 @@
 
 #pragma once
 
-#include "linear_ir.hpp"
+#include "expression.hpp"
+#include "expressions/buffer_expression.hpp"
 
-#include "snippets/snippets_isa.hpp"
+#include "snippets/op/loop.hpp"
+#include "snippets/op/buffer.hpp"
+#include "snippets/op/perf_count.hpp"
 
 namespace ov {
 namespace snippets {
 namespace lowered {
 
-class LinearIR::ExpressionFactory {
+class ExpressionFactory {
 public:
-    template<class... Args>
-    static ExpressionPtr build(const std::shared_ptr<Node>& n, Args&&... params) {
-        if (const auto par = ov::as_type_ptr<ov::op::v0::Parameter>(n)) {
-            return create(par, params...);
-        } else if (const auto res = ov::as_type_ptr<ov::op::v0::Result>(n)) {
-            return create(res, params...);
-        } else if (const auto loop_begin = ov::as_type_ptr<op::LoopBegin>(n)) {
-            return create(loop_begin, params...);
-        } else if (const auto loop_end = ov::as_type_ptr<op::LoopEnd>(n)) {
-            return create(loop_end, params...);
-#ifdef SNIPPETS_DEBUG_CAPS
-        } else if (const auto perf_counter = ov::as_type_ptr<op::PerfCountBeginBase>(n)) {
-            return create(perf_counter, params...);
-        } else if (const auto perf_counter = ov::as_type_ptr<op::PerfCountEndBase>(n)) {
-            return create(perf_counter, params...);
-#endif
-        }
-        return create(n, params...);
+    ExpressionFactory(std::shared_ptr<IShapeInferSnippetsFactory> shape_infer_factory)
+        : m_shape_infer_factory(std::move(shape_infer_factory)) {}
+
+    template <typename T = Expression, typename... Args,
+              typename std::enable_if<std::is_base_of<Expression, T>::value, bool>::type = true>
+    std::shared_ptr<T> build(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs, Args... args) {
+        return create<T>(n, inputs, m_shape_infer_factory, args...);
     }
 
 private:
-    /* -- Default Builders - initialize input port connectors from parents and create new output port connectors themselves */
-    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Parameter>& par, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Result>& res, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<ov::Node>& n, const LinearIR& linear_ir);
-
-    /* -- Input Builders - get input port connectors from method parameters and create new output port connectors themselves */
-    static ExpressionPtr create(const std::shared_ptr<op::LoopBegin>& n, const std::vector<PortConnectorPtr>& inputs, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<op::LoopEnd>& n, const std::vector<PortConnectorPtr>& inputs, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<ov::Node>& n, const std::vector<PortConnectorPtr>& inputs, const LinearIR& linear_ir);
+    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Parameter>& par, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Result>& res, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<op::LoopBegin>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<op::LoopEnd>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
 
     // Note: PerfCountBegin nodes have a PerfCountEnd ov::Output, but corresponding expression should not have any outputs to avoid register allocation
 #ifdef SNIPPETS_DEBUG_CAPS
-    static ExpressionPtr create(const std::shared_ptr<op::PerfCountBeginBase>& n,
-                                                   const std::vector<PortConnectorPtr>& inputs,
-                                                   const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<op::PerfCountEndBase>& n,
-                                                   const std::vector<PortConnectorPtr>& inputs,
-                                                   const LinearIR& linear_ir);
-    static ExpressionPtr create_without_connections(const std::shared_ptr<ov::Node>& n, const LinearIR& linear_ir);
+    static ExpressionPtr create(const std::shared_ptr<op::PerfCountBeginBase>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<op::PerfCountEndBase>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create_without_connections(const std::shared_ptr<ov::Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
 #endif
 
-    // Creates inputs for expression using parent output port connectors
-    static void create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr);
+    template <typename T = Expression, typename... Args,
+              typename std::enable_if<std::is_base_of<Expression, T>::value, bool>::type = true>
+    static std::shared_ptr<T> create(const std::shared_ptr<ov::Node>& n, const std::vector<PortConnectorPtr>& inputs,
+                                     const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory, Args... args) {
+        auto expr = std::shared_ptr<T>(new T(n, shape_infer_factory, args...));
+        init_expression_inputs(expr, inputs);
+        create_expression_outputs(expr);
+        expr->validate();
+        // todo: here we blindly synchronize input shapes from parent and child. Remove this when shapes will be stored in port connector itself
+        if (shape_infer_factory)
+            expr->updateShapes();
+        return expr;
+    }
+
     // Creates new output port connectors
     static void create_expression_outputs(const ExpressionPtr& expr);
     // The method verifies of input port connectors to availability of the expression as consumer and add it if missed
     static void init_expression_inputs(const ExpressionPtr& expr, const std::vector<PortConnectorPtr>& inputs);
+
+    const std::shared_ptr<IShapeInferSnippetsFactory> m_shape_infer_factory = nullptr;
 };
+using ExpressionFactoryPtr = std::shared_ptr<ExpressionFactory>;
+
+template<>
+std::shared_ptr<Expression> ExpressionFactory::build(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs);
 
 } // namespace lowered
 } // namespace snippets

diff --git a/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp b/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "snippets/lowered/expression.hpp"
+
+#include "snippets/utils/utils.hpp"
+
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+
+// To avoid cycle-dependancy of includes, we forward-declare LoopManager
+class LoopManager;
+/**
+ * @interface BufferExpression
+ * @brief This is a base class for memory storage.
+ *        Note that Buffer should be a single consumer for operation output port
+ * @param m_allocation_size - memory size for allocation in bytes. Dynamic value means undefined size.
+ * @param m_offset - offset in common Buffer scratchpad
+ * @param m_reg_group - number of register group. The Buffers from the same register group will have the same GPR
+ * @param m_cluster_id - number of cluster. The Buffers from the same cluster shares memory between them and will have the same offset.
+ * @ingroup snippets
+ */
+class BufferExpression : public Expression {
+    friend class ExpressionFactory;
+public:
+    OPENVINO_RTTI("BufferExpression", "0", Expression)
+    BufferExpression() = default;
+
+    bool visit_attributes(AttributeVisitor &visitor) override;
+
+    size_t get_reg_group() const { return m_reg_group; }
+    size_t get_cluster_id() const { return m_cluster_id; }
+    size_t get_offset() const { return m_offset; }
+    size_t get_allocation_size() const { return m_allocation_size; }
+    size_t get_byte_size() const;
+
+    void set_reg_group(size_t reg_group) { m_reg_group = reg_group; }
+    void set_cluster_id(size_t cluster) { m_cluster_id = cluster; }
+    void set_allocation_size(size_t size) { m_allocation_size = size; }
+    void set_offset(size_t offset) { m_offset = offset; }
+
+    virtual void init_allocation_size(const std::shared_ptr<LoopManager>& loop_manager, size_t allocation_rank);
+
+    // Returns True, if allocation size is known. Otherwise returns False - allocation size is undefined
+    bool is_defined() const;
+
+    // Returns True, if the memory is independent - expression doesn't have parents (source)
+    bool is_independent_memory() const { return get_input_count() == 0; }
+
+protected:
+    BufferExpression(const std::shared_ptr<Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& factory);
+
+    ExpressionPtr clone() const override;
+
+    size_t m_allocation_size = utils::get_dynamic_value<size_t>();
+    size_t m_reg_group = 0;
+    size_t m_cluster_id = 0;
+    size_t m_offset = utils::get_dynamic_value<size_t>();
+};
+using BufferExpressionPtr = std::shared_ptr<BufferExpression>;
+
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp
@@ -7,6 +7,8 @@
 #include <list>
 
 #include "snippets/lowered/expression.hpp"
+#include "snippets/lowered/expression_factory.hpp"
+#include "snippets/lowered/expressions/buffer_expression.hpp"
 #include "snippets/target_machine.hpp"
 #include "snippets/shape_inference/shape_inference.hpp"
 #ifdef SNIPPETS_DEBUG_CAPS
@@ -51,7 +53,6 @@ using LoopManagerPtr = std::shared_ptr<LoopManager>;
  */
 class LinearIR {
     friend class LinearIRBuilder;
-    class ExpressionFactory;
 public:
     using container = std::list<ExpressionPtr>;
     using exprIt = container::iterator;
@@ -62,12 +63,12 @@ class LinearIR {
     LinearIR(Config config = {}, const std::shared_ptr<IShapeInferSnippetsFactory>& factory = {});
     LinearIR(const std::shared_ptr<ov::Model>& m, const std::shared_ptr<IShapeInferSnippetsFactory>& factory, Config config = {});
 
-    ExpressionPtr create_expression(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs) const;
+    const ExpressionFactoryPtr& get_expr_factory() const;
 
     const container& get_ops() const { return m_expressions; }
-    const container& get_buffers() const { return m_buffer_expressions; }
-    const container& get_parameters() const { return m_parameter_expressions; }
-    const container& get_results() const { return m_result_expressions; }
+    const std::vector<ExpressionPtr>& get_parameters() const { return m_parameter_expressions; }
+    const std::vector<ExpressionPtr>& get_results() const { return m_result_expressions; }
+    const std::vector<BufferExpressionPtr>& get_buffers() const { return m_buffer_expressions; }
     const Config& get_config() const { return m_config; }
     size_t get_static_buffer_scratchpad_size() const { return m_static_buffer_scratchpad_size; }
 
@@ -186,6 +187,20 @@ class LinearIR {
         return std::make_pair(expr_it, node);
     }
 
+    /**
+     * @brief Insert new Expression to LinearIR, sets `loops_ids` as loop identifiers and inserts the expression on the `place` in LinearIR.
+     *        Also connects output ports to `consumers`
+     * @param new_expr the target expr which were created by ExpressionFactory
+     * @param loop_ids vector of loops ids that will be set for the expression
+     * @param update_loop_ports true - the helpers updates the corresponding loop ports after insertion otherwise - skip
+     * @param place before this place expression will be inserted
+     * @param consumers vector of expression port sets. These expression ports will be consumers of the expression.
+     *        The vector may be empty or size of vector must be equal to output port count
+     * @return new expression iterator in LinearIR
+     */
+    exprIt insert_expr(const ExpressionPtr& new_expr, const std::vector<size_t>& loop_ids,
+                       bool update_loop_ports, const constExprIt& place, const std::vector<std::set<ExpressionPort>>& consumers);
+
     /**
      * @brief Replace the several existing expressions with the one new expression that contains `new_node`.
      *        Calls the helper `insert_node` and performs substitution: removes `old_exprs`.
@@ -248,21 +263,22 @@ class LinearIR {
 private:
     class LIRShapeInfer : public ShapeInferSnippetsNode {
     public:
-        explicit LIRShapeInfer(const container& body_exprs, const container& param_exprs, const container& result_exprs);
+        explicit LIRShapeInfer(const container& body_exprs, const std::vector<ExpressionPtr>& param_exprs, const std::vector<ExpressionPtr>& result_exprs);
         Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
 
     private:
         const container& m_exprs;
-        const container& m_input_exprs;
-        const container& m_output_exprs;
+        const std::vector<ExpressionPtr>& m_input_exprs;
+        const std::vector<ExpressionPtr>& m_output_exprs;
     };
 
     static ov::NodeVector get_ordered_ops(const std::shared_ptr<ov::Model>& model);
-    // Default way: expr port connectors are constructed basing on ov::Node connection
-    ExpressionPtr create_expression(const std::shared_ptr<Node>& n);
     ExpressionPtr create_expression(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& new_inputs,
                                     const std::vector<size_t>& loop_ids, bool update_loop_ports, const std::vector<std::set<ExpressionPort>>& consumers = {});
 
+     // Creates inputs for expression using parent output port connectors
+    std::vector<PortConnectorPtr> get_expression_inputs_by_node(const std::shared_ptr<Node>& n) const;
+
     void register_expression(const ExpressionPtr& expr, bool io_allowed, double exec_num);
     void unregister_expression(const ExpressionPtr& expr);
 
@@ -271,13 +287,16 @@ class LinearIR {
 
     container m_expressions{};
     std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Expression>> m_node2expression_map;
-    container m_parameter_expressions{};
-    container m_result_expressions{};
-    container m_buffer_expressions{};
+    // Note: Parameters and Results are stored in the order of Subgraph inputs/outputs
+    std::vector<ExpressionPtr> m_parameter_expressions{};
+    std::vector<ExpressionPtr> m_result_expressions{};
+    // Note: BufferExpressions are not stored in the order of execution numbers
+    std::vector<BufferExpressionPtr> m_buffer_expressions{};
     Config m_config{};
     LoopManagerPtr m_loop_manager;
-    std::shared_ptr<IShapeInferSnippetsFactory> m_shape_infer_factory;
+    std::shared_ptr<IShapeInferSnippetsFactory> m_shape_infer_factory = nullptr;
     std::shared_ptr<ShapeInferSnippetsNode> m_shape_infer = nullptr;
+    std::shared_ptr<ExpressionFactory> m_expression_factory = nullptr;
     bool m_is_dynamic = false;
 
     // Size of static Buffer Scratchpad (Buffers with defined allocation size)

diff --git a/src/common/snippets/include/snippets/lowered/pass/compute_buffer_allocation_size.hpp b/src/common/snippets/include/snippets/lowered/pass/compute_buffer_allocation_size.hpp
@@ -22,14 +22,9 @@ namespace pass {
 class ComputeBufferAllocationSize : public RangedPass {
 public:
     OPENVINO_RTTI("ComputeBufferAllocationSize", "RangedPass")
-    ComputeBufferAllocationSize(size_t buffer_allocation_rank) : m_buffer_allocation_rank(buffer_allocation_rank) {}
+    ComputeBufferAllocationSize() = default;
 
     bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override;
-
-    static size_t get_allocation_size(const LoopManagerPtr& loop_manager, const ExpressionPtr& buffer_expr, size_t allocation_rank);
-
-private:
-    const size_t m_buffer_allocation_rank = 0;
 };
 
 } // namespace pass