From 4f0d37db2c66c487c97959d7182694f9065d8802 Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Mon, 2 Sep 2024 11:30:07 +0400
Subject: [PATCH] [Snippets] Created BufferExpression

[Snippets][CPU] Implemented BrgemmCopyB specific buffers

[Snippets] Fixed build
---
 .../include/snippets/lowered/expression.hpp   |  38 ++++-
 .../snippets/lowered/expression_factory.hpp   |  85 ++++++-----
 .../lowered/expressions/buffer_expression.hpp |  68 +++++++++
 .../include/snippets/lowered/linear_ir.hpp    |  37 ++++-
 .../pass/compute_buffer_allocation_size.hpp   |   7 +-
 .../lowered/pass/define_buffer_clusters.hpp   |  17 ++-
 .../lowered/pass/propagate_buffer_offset.hpp  |   2 +-
 .../lowered/pass/set_buffer_reg_group.hpp     |  14 +-
 .../lowered/pass/solve_buffer_memory.hpp      |   9 +-
 .../lowered/pass/validate_buffers.hpp         |  29 ++++
 .../snippets/include/snippets/op/buffer.hpp   |  79 ++--------
 .../include/snippets/runtime_configurator.hpp |   2 +-
 src/common/snippets/src/generator.cpp         |   5 +-
 .../snippets/src/lowered/expression.cpp       | 133 +++++++++++++---
 .../src/lowered/expression_factory.cpp        | 113 ++++++--------
 .../lowered/expressions/buffer_expression.cpp | 143 ++++++++++++++++++
 src/common/snippets/src/lowered/linear_ir.cpp |  64 ++++++--
 .../src/lowered/pass/allocate_buffers.cpp     |   2 +-
 .../src/lowered/pass/assign_registers.cpp     |  29 ++--
 .../pass/clean_repeated_ptr_shifts.cpp        |  16 +-
 .../pass/compute_buffer_allocation_size.cpp   | 108 +------------
 .../lowered/pass/define_buffer_clusters.cpp   |  63 ++++----
 .../src/lowered/pass/init_buffers_default.cpp |  24 ++-
 .../snippets/src/lowered/pass/init_loops.cpp  |   4 +-
 .../src/lowered/pass/insert_buffers.cpp       |   4 +-
 .../src/lowered/pass/insert_load_store.cpp    |   4 +-
 .../pass/insert_specific_iterations.cpp       |  20 ++-
 .../pass/normalize_buffer_reg_groups.cpp      |  16 +-
 .../lowered/pass/propagate_buffer_offset.cpp  |  34 ++---
 .../src/lowered/pass/set_buffer_reg_group.cpp |  74 ++++-----
 .../src/lowered/pass/solve_buffer_memory.cpp  |  52 +++----
 .../snippets/src/lowered/pass/validate.cpp    |  39 +----
 .../src/lowered/pass/validate_buffers.cpp     |  68 +++++++++
 src/common/snippets/src/op/buffer.cpp         | 124 ++++++++-------
 .../snippets/src/op/serialization_node.cpp    |  78 +---------
 src/common/snippets/src/op/subgraph.cpp       |   2 +
 .../snippets/src/runtime_configurator.cpp     |  14 +-
 .../src/shape_inference/shape_inference.cpp   |   3 +-
 .../src/lowered/pass/buffer_allocation.cpp    |  12 +-
 .../snippets/tests/src/lowering_utils.cpp     |   3 +-
 .../snippets/aarch64/jit_kernel_emitter.cpp   |  24 +--
 .../emitters/snippets/x64/cpu_generator.cpp   |   3 +-
 .../snippets/x64/jit_brgemm_emitter.cpp       |   2 +-
 .../snippets/x64/jit_kernel_emitter.cpp       |  24 +--
 .../snippets/x64/jit_memory_emitters.cpp      |   4 +-
 src/plugins/intel_cpu/src/extension.cpp       |   3 +-
 src/plugins/intel_cpu/src/nodes/subgraph.cpp  |   7 +-
 .../snippets/x64/op/brgemm_cpu.cpp            |   2 +-
 .../snippets/x64/op/brgemm_utils.cpp          |  40 -----
 .../snippets/x64/op/brgemm_utils.hpp          |  12 --
 .../x64/pass/brgemm_to_brgemm_cpu.cpp         |   2 +-
 .../x64/pass/lowered/brgemm_cpu_blocking.cpp  |   4 +-
 .../lowered/insert_brgemm_copy_b_buffers.cpp  | 140 +++++++++++++++++
 .../lowered/insert_brgemm_copy_b_buffers.hpp  |  65 ++++++++
 .../set_brgemm_copy_b_buffers_shape.cpp       |  43 ------
 .../set_brgemm_copy_b_buffers_shape.hpp       |  31 ----
 .../x64/lowered/brgemm_blocking.cpp           |   4 +-
 .../x64/lowered/buffer_allocation.cpp         |  16 +-
 58 files changed, 1156 insertions(+), 908 deletions(-)
 create mode 100644 src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp
 create mode 100644 src/common/snippets/include/snippets/lowered/pass/validate_buffers.hpp
 create mode 100644 src/common/snippets/src/lowered/expressions/buffer_expression.cpp
 create mode 100644 src/common/snippets/src/lowered/pass/validate_buffers.cpp
 create mode 100644 src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.cpp
 create mode 100644 src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.hpp
 delete mode 100644 src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.cpp
 delete mode 100644 src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp

diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp
index a04368e5605435..befbaeb3c526d6 100644
--- a/src/common/snippets/include/snippets/lowered/expression.hpp
+++ b/src/common/snippets/include/snippets/lowered/expression.hpp
@@ -17,15 +17,18 @@ namespace ov {
 namespace snippets {
 namespace lowered {
 
+class ExpressionFactory;
 class LinearIR;
 using ExpressionPtr = std::shared_ptr<Expression>;
 using ExpressionMap = std::unordered_map<Expression*, ExpressionPtr>;
 class Expression : public std::enable_shared_from_this<Expression> {
     friend class LinearIR;
+    friend class ExpressionFactory;
     friend class ExpressionPort;
 
 public:
     Expression() = default;
+    virtual ~Expression() = default;
 
     std::shared_ptr<Node> get_node() const;
     std::shared_ptr<Emitter> get_emitter() const;
@@ -50,7 +53,8 @@ class Expression : public std::enable_shared_from_this<Expression> {
 
     void set_input_port_connector(size_t port, PortConnectorPtr to);
 
-    void validate() const;
+    // Cannot be called in ctor because validate port attributes (descs, connectors) also
+    virtual void validate() const;
 
     ExpressionPort get_input_port(size_t i);
     ExpressionPort get_output_port(size_t i);
@@ -61,16 +65,42 @@ class Expression : public std::enable_shared_from_this<Expression> {
     bool needShapeInfer() const { return m_need_shape_infer; }
     const std::vector<size_t>& get_loop_ids() const;
     void set_loop_ids(const std::vector<size_t>& loops);
-    ExpressionPtr clone_with_new_inputs(const std::vector<PortConnectorPtr>& new_inputs,
-                                        const std::shared_ptr<Node>& new_node) const;
+    ExpressionPtr clone_with_new_inputs(const std::shared_ptr<Node>& new_node, const std::vector<PortConnectorPtr>& new_inputs,
+                                        const std::vector<PortDescriptorPtr>& new_in_descs = {}) const;
     ExpressionPtr clone_with_new_inputs(const ExpressionMap& expr_map, const std::shared_ptr<Node>& new_node) const;
 
+    virtual bool visit_attributes(AttributeVisitor &visitor);
+
+    // Note that get_type_info_static and get_type_info are needed to mimic OPENVINO_RTTI interface,
+    // so the standard OPENVINO_RTTI(...) macros could be used in derived classes.
+    _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() {
+        static ::ov::DiscreteTypeInfo type_info_static {"Expression"};
+        type_info_static.hash();
+        return type_info_static;
+    }
+
+    virtual const DiscreteTypeInfo& get_type_info() const {
+        return get_type_info_static();
+    }
+
+    const char* get_type_name() const {
+        return get_type_info().name;
+    }
+
 protected:
     Expression(const Expression& other);
     // Note: The constructor initialization is private since an expression can be created only by Linear IR.
     //       The method must be used only by Linear IR builder of expressions!
     Expression(const std::shared_ptr<Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& factory, bool need_shape_infer = true);
-    void update_node_and_connectors(const std::vector<PortConnectorPtr>& new_inputs, const std::shared_ptr<Node>& new_node);
+
+    // Virtual clone method wich is called in clone_with_new_inputs with common logic
+    virtual ExpressionPtr clone() const;
+    // Called in ctors to validate expression attributes
+    virtual void validate_attributes() const;
+
+    // used in clone_with_new_inputs. New output port descriptors were inited automatically
+    void update_port_attributes(const std::shared_ptr<Node>& new_node, const std::vector<PortConnectorPtr>& new_inputs,
+                                const std::vector<PortDescriptorPtr>& new_in_descs, const std::vector<PortDescriptorPtr>& new_out_descs);
 
     std::shared_ptr<Node> m_source_node{nullptr};
     std::shared_ptr<Emitter> m_emitter{nullptr};
diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp
index ca45fe936e0500..d617eb3d03b410 100644
--- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp
+++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp
@@ -4,65 +4,72 @@
 
 #pragma once
 
-#include "linear_ir.hpp"
+#include "expression.hpp"
+#include "expressions/buffer_expression.hpp"
 
-#include "snippets/snippets_isa.hpp"
+#include "snippets/op/loop.hpp"
+#include "snippets/op/buffer.hpp"
+#include "snippets/op/perf_count.hpp"
 
 namespace ov {
 namespace snippets {
 namespace lowered {
 
-class LinearIR::ExpressionFactory {
+class ExpressionFactory {
 public:
-    template<class... Args>
-    static ExpressionPtr build(const std::shared_ptr<Node>& n, Args&&... params) {
-        if (const auto par = ov::as_type_ptr<ov::op::v0::Parameter>(n)) {
-            return create(par, params...);
-        } else if (const auto res = ov::as_type_ptr<ov::op::v0::Result>(n)) {
-            return create(res, params...);
-        } else if (const auto loop_begin = ov::as_type_ptr<op::LoopBegin>(n)) {
-            return create(loop_begin, params...);
-        } else if (const auto loop_end = ov::as_type_ptr<op::LoopEnd>(n)) {
-            return create(loop_end, params...);
-#ifdef SNIPPETS_DEBUG_CAPS
-        } else if (const auto perf_counter = ov::as_type_ptr<op::PerfCountBeginBase>(n)) {
-            return create(perf_counter, params...);
-        } else if (const auto perf_counter = ov::as_type_ptr<op::PerfCountEndBase>(n)) {
-            return create(perf_counter, params...);
-#endif
-        }
-        return create(n, params...);
+    ExpressionFactory(std::shared_ptr<IShapeInferSnippetsFactory> shape_infer_factory)
+        : m_shape_infer_factory(std::move(shape_infer_factory)) {}
+
+    template <typename T = Expression, typename... Args,
+              typename std::enable_if<std::is_base_of<Expression, T>::value, bool>::type = true>
+    std::shared_ptr<T> build(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs, Args... args) {
+        return create<T>(n, inputs, m_shape_infer_factory, args...);
     }
 
 private:
-    /* -- Default Builders - initialize input port connectors from parents and create new output port connectors themselves */
-    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Parameter>& par, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Result>& res, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<ov::Node>& n, const LinearIR& linear_ir);
-
-    /* -- Input Builders - get input port connectors from method parameters and create new output port connectors themselves */
-    static ExpressionPtr create(const std::shared_ptr<op::LoopBegin>& n, const std::vector<PortConnectorPtr>& inputs, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<op::LoopEnd>& n, const std::vector<PortConnectorPtr>& inputs, const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<ov::Node>& n, const std::vector<PortConnectorPtr>& inputs, const LinearIR& linear_ir);
+    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Parameter>& par, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<ov::op::v0::Result>& res, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<op::LoopBegin>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<op::LoopEnd>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
 
     // Note: PerfCountBegin nodes have a PerfCountEnd ov::Output, but corresponding expression should not have any outputs to avoid register allocation
 #ifdef SNIPPETS_DEBUG_CAPS
-    static ExpressionPtr create(const std::shared_ptr<op::PerfCountBeginBase>& n,
-                                                   const std::vector<PortConnectorPtr>& inputs,
-                                                   const LinearIR& linear_ir);
-    static ExpressionPtr create(const std::shared_ptr<op::PerfCountEndBase>& n,
-                                                   const std::vector<PortConnectorPtr>& inputs,
-                                                   const LinearIR& linear_ir);
-    static ExpressionPtr create_without_connections(const std::shared_ptr<ov::Node>& n, const LinearIR& linear_ir);
+    static ExpressionPtr create(const std::shared_ptr<op::PerfCountBeginBase>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create(const std::shared_ptr<op::PerfCountEndBase>& n, const std::vector<PortConnectorPtr>& inputs,
+                                const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
+    static ExpressionPtr create_without_connections(const std::shared_ptr<ov::Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory);
 #endif
 
-    // Creates inputs for expression using parent output port connectors
-    static void create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr);
+    template <typename T = Expression, typename... Args,
+              typename std::enable_if<std::is_base_of<Expression, T>::value, bool>::type = true>
+    static std::shared_ptr<T> create(const std::shared_ptr<ov::Node>& n, const std::vector<PortConnectorPtr>& inputs,
+                                     const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory, Args... args) {
+        auto expr = std::shared_ptr<T>(new T(n, shape_infer_factory, args...));
+        init_expression_inputs(expr, inputs);
+        create_expression_outputs(expr);
+        expr->validate();
+        // todo: here we blindly synchronize input shapes from parent and child. Remove this when shapes will be stored in port connector itself
+        if (shape_infer_factory)
+            expr->updateShapes();
+        return expr;
+    }
+
     // Creates new output port connectors
     static void create_expression_outputs(const ExpressionPtr& expr);
     // The method verifies of input port connectors to availability of the expression as consumer and add it if missed
     static void init_expression_inputs(const ExpressionPtr& expr, const std::vector<PortConnectorPtr>& inputs);
+
+    const std::shared_ptr<IShapeInferSnippetsFactory> m_shape_infer_factory = nullptr;
 };
+using ExpressionFactoryPtr = std::shared_ptr<ExpressionFactory>;
+
+template<>
+std::shared_ptr<Expression> ExpressionFactory::build(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs);
 
 } // namespace lowered
 } // namespace snippets
diff --git a/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp b/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp
new file mode 100644
index 00000000000000..94fdf1c8dcdc1a
--- /dev/null
+++ b/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "snippets/lowered/expression.hpp"
+
+#include "snippets/utils/utils.hpp"
+
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+
+// To avoid cycle-dependancy of includes, we forward-declare LoopManager
+class LoopManager;
+/**
+ * @interface BufferExpression
+ * @brief This is a base class for memory storage.
+ *        Notes that Buffer should be a single consumer for operation output port
+ * @param m_allocation_size - memory size for allocation in bytes. Dynamic value means undefined size.
+ * @param m_offset - offset in common Buffer scratchpad
+ * @param m_reg_group - number of register group. The Buffers from the same register group will have the same GPR
+ * @param m_cluster_id - number of cluster. The Buffers from the same cluster shares memory between them and will have the same offset.
+ * @ingroup snippets
+ */
+class BufferExpression : public Expression {
+    friend class ExpressionFactory;
+public:
+    OPENVINO_RTTI("BufferExpression", "0", Expression)
+    BufferExpression() = default;
+
+    bool visit_attributes(AttributeVisitor &visitor) override;
+
+    size_t get_reg_group() const { return m_reg_group; }
+    size_t get_cluster_id() const { return m_cluster_id; }
+    size_t get_offset() const { return m_offset; }
+    size_t get_allocation_size() const { return m_allocation_size; }
+    size_t get_byte_size() const;
+
+    void set_reg_group(size_t reg_group) { m_reg_group = reg_group; }
+    void set_cluster_id(size_t cluster) { m_cluster_id = cluster; }
+    void set_allocation_size(size_t size) { m_allocation_size = size; }
+    void set_offset(size_t offset) { m_offset = offset; }
+
+    virtual void init_allocation_size(const std::shared_ptr<LoopManager>& loop_manager, size_t allocation_rank);
+
+    // Returns True, if allocation size is known. Otherwise returns False - allocation size is undefined
+    bool is_defined() const;
+
+protected:
+    BufferExpression(const BufferExpression& other);
+    BufferExpression(const std::shared_ptr<Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& factory);
+
+    ExpressionPtr clone() const override;
+    void validate_attributes() const override;
+
+    size_t m_allocation_size = utils::get_dynamic_value<size_t>();
+    size_t m_reg_group = 0;
+    size_t m_cluster_id = 0;
+    size_t m_offset = utils::get_dynamic_value<size_t>();
+};
+using BufferExpressionPtr = std::shared_ptr<BufferExpression>;
+
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp
index 55afd2c9ccd7ab..6038b608a76ff7 100644
--- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp
+++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp
@@ -7,6 +7,8 @@
 #include <list>
 
 #include "snippets/lowered/expression.hpp"
+#include "snippets/lowered/expression_factory.hpp"
+#include "snippets/lowered/expressions/buffer_expression.hpp"
 #include "snippets/target_machine.hpp"
 #include "snippets/shape_inference/shape_inference.hpp"
 #ifdef SNIPPETS_DEBUG_CAPS
@@ -51,9 +53,12 @@ using LoopManagerPtr = std::shared_ptr<LoopManager>;
  */
 class LinearIR {
     friend class LinearIRBuilder;
-    class ExpressionFactory;
 public:
-    using container = std::list<ExpressionPtr>;
+    template <typename Expr = Expression,
+              typename std::enable_if<std::is_base_of<Expression, Expr>::value, bool>::type = true>
+    using containerT = std::list<std::shared_ptr<Expr>>;
+    using container = containerT<Expression>;
+    using buffers = containerT<BufferExpression>;
     using exprIt = container::iterator;
     using constExprIt = container::const_iterator;
     using exprReverseIt = container::reverse_iterator;
@@ -62,12 +67,12 @@ class LinearIR {
     LinearIR(Config config = {}, const std::shared_ptr<IShapeInferSnippetsFactory>& factory = {});
     LinearIR(const std::shared_ptr<ov::Model>& m, const std::shared_ptr<IShapeInferSnippetsFactory>& factory, Config config = {});
 
-    ExpressionPtr create_expression(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs) const;
+    const ExpressionFactoryPtr& get_expr_factory() const;
 
     const container& get_ops() const { return m_expressions; }
-    const container& get_buffers() const { return m_buffer_expressions; }
     const container& get_parameters() const { return m_parameter_expressions; }
     const container& get_results() const { return m_result_expressions; }
+    const buffers& get_buffers() const { return m_buffer_expressions; }
     const Config& get_config() const { return m_config; }
     size_t get_static_buffer_scratchpad_size() const { return m_static_buffer_scratchpad_size; }
 
@@ -186,6 +191,20 @@ class LinearIR {
         return std::make_pair(expr_it, node);
     }
 
+    /**
+     * @brief Insert new Expression to LinearIR, sets `loops_ids` as loop identifiers and inserts the expression on the `place` in LinearIR.
+     *        Also connects output ports to `consumers`
+     * @param new_expr the target expr which were created by ExpressionFactory
+     * @param loop_ids vector of loops ids that will be set for the expression
+     * @param update_loop_ports true - the helpers updates the corresponding loop ports after insertion otherwise - skip
+     * @param place before this place expression will be inserted
+     * @param consumers vector of expression port sets. These expression ports will be consumers of the expression.
+     *        The vector may be empty or size of vector must be equal to output port count
+     * @return new expression iterator in LinearIR
+     */
+    exprIt insert_expr(const ExpressionPtr& new_expr, const std::vector<size_t>& loop_ids,
+                       bool update_loop_ports, const constExprIt& place, const std::vector<std::set<ExpressionPort>>& consumers);
+
     /**
      * @brief Replace the several existing expressions with the one new expression that contains `new_node`.
      *        Calls the helper `insert_node` and performs substitution: removes `old_exprs`.
@@ -258,11 +277,12 @@ class LinearIR {
     };
 
     static ov::NodeVector get_ordered_ops(const std::shared_ptr<ov::Model>& model);
-    // Default way: expr port connectors are constructed basing on ov::Node connection
-    ExpressionPtr create_expression(const std::shared_ptr<Node>& n);
     ExpressionPtr create_expression(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& new_inputs,
                                     const std::vector<size_t>& loop_ids, bool update_loop_ports, const std::vector<std::set<ExpressionPort>>& consumers = {});
 
+     // Creates inputs for expression using parent output port connectors
+    std::vector<PortConnectorPtr> get_expression_inputs_by_node(const std::shared_ptr<Node>& n) const;
+
     void register_expression(const ExpressionPtr& expr, bool io_allowed, double exec_num);
     void unregister_expression(const ExpressionPtr& expr);
 
@@ -273,11 +293,12 @@ class LinearIR {
     std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Expression>> m_node2expression_map;
     container m_parameter_expressions{};
     container m_result_expressions{};
-    container m_buffer_expressions{};
+    buffers m_buffer_expressions{};
     Config m_config{};
     LoopManagerPtr m_loop_manager;
-    std::shared_ptr<IShapeInferSnippetsFactory> m_shape_infer_factory;
+    std::shared_ptr<IShapeInferSnippetsFactory> m_shape_infer_factory = nullptr;
     std::shared_ptr<ShapeInferSnippetsNode> m_shape_infer = nullptr;
+    std::shared_ptr<ExpressionFactory> m_expression_factory = nullptr;
     bool m_is_dynamic = false;
 
     // Size of static Buffer Scratchpad (Buffers with defined allocation size)
diff --git a/src/common/snippets/include/snippets/lowered/pass/compute_buffer_allocation_size.hpp b/src/common/snippets/include/snippets/lowered/pass/compute_buffer_allocation_size.hpp
index 830956338ef4a1..01d8b3ee85261e 100644
--- a/src/common/snippets/include/snippets/lowered/pass/compute_buffer_allocation_size.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/compute_buffer_allocation_size.hpp
@@ -22,14 +22,9 @@ namespace pass {
 class ComputeBufferAllocationSize : public RangedPass {
 public:
     OPENVINO_RTTI("ComputeBufferAllocationSize", "RangedPass")
-    ComputeBufferAllocationSize(size_t buffer_allocation_rank) : m_buffer_allocation_rank(buffer_allocation_rank) {}
+    ComputeBufferAllocationSize() = default;
 
     bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override;
-
-    static size_t get_allocation_size(const LoopManagerPtr& loop_manager, const ExpressionPtr& buffer_expr, size_t allocation_rank);
-
-private:
-    const size_t m_buffer_allocation_rank = 0;
 };
 
 } // namespace pass
diff --git a/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp b/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp
index 824b0d4daea75d..1597eaa2377a50 100644
--- a/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp
@@ -43,27 +43,27 @@ class DefineBufferClusters : public RangedPass {
     bool run(lowered::LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override;
 
 private:
-    using BufferCluster = std::set<ExpressionPtr>;
+    using BufferCluster = std::set<BufferExpressionPtr>;
     using BufferClusters = std::vector<BufferCluster>;
-    using BufferPorts = std::unordered_map<ExpressionPtr, std::set<size_t>>;
+    using BufferPorts = std::unordered_map<BufferExpressionPtr, std::set<size_t>>;
     /**
      * @brief Finds Buffer cluster in set of clusters which contains the target expression with Buffer
      * @param target target expression with Buffer op
      * @return vector iterator which refers to the found cluster
      */
-    BufferClusters::iterator find_cluster_by_expr(const ExpressionPtr& target);
+    BufferClusters::iterator find_cluster_by_expr(const BufferExpressionPtr& target);
     /**
      * @brief Returns True if Buffer is direct source for the target expr (there aren't other loop between the Buffer and target expr)
      * @param buffer_expr expression with assumed Buffer op
      * @param target_expr expression with target op - LoopEnd or MemoryAccess op
      * @return boolean value
      */
-    bool is_direct_buffer(const ExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const;
+    bool is_direct_buffer(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const;
     /**
      * @brief Creates new buffer cluster if buffer_exprs is missed in clusters. If buffer_exprs is already in clusters, do nothing
      * @param buffer_expr expression with Buffer op
      */
-    void create_new_cluster(const ExpressionPtr& buffer_expr);
+    void create_new_cluster(const BufferExpressionPtr& buffer_expr);
     /**
      * @brief Returns common ID of cluster if all buffer inside have the same Buffer ID. Otherwise returns the default value SIZE_MAX
      *        that means that Buffers in cluster have different IDs.
@@ -106,7 +106,7 @@ class DefineBufferClusters : public RangedPass {
      * @param buffer_expr expression with Buffer op
      * @return finalization offset - int64_t value
      */
-    int64_t get_buffer_finalization_offset(const ExpressionPtr& buffer_expr) const;
+    int64_t get_buffer_finalization_offset(const BufferExpressionPtr& buffer_expr) const;
     /**
      * @brief Check if two Buffer expressions are connected to the same Loop. Set common LoopEnd as `loop` parameter and
      *        indexes of Loop ports `up_idx` and `down_idx` if Buffers are really neighbours
@@ -117,7 +117,8 @@ class DefineBufferClusters : public RangedPass {
      * @param down_idx the reference to port index of lower Buffer op to the Loop
      * @return Return True if the Buffers are connected to the same Loop
      */
-    static bool are_buffer_neighbours(const ExpressionPtr& up, const ExpressionPtr& down, ExpressionPtr& loop, size_t& up_idx, size_t& down_idx);
+    static bool are_buffer_neighbours(const BufferExpressionPtr& up, const BufferExpressionPtr& down, ExpressionPtr& loop,
+                                      size_t& up_idx, size_t& down_idx);
     /**
      * @brief Unite clusters
      * @param inner_cluster_it iterator to inner cluster - buffer cluster is in the loop
@@ -127,7 +128,7 @@ class DefineBufferClusters : public RangedPass {
      * @return Return True if clusters have been united
      */
     bool unite_nested_clusters(const BufferClusters::iterator& inner_cluster_it, BufferCluster& outer_cluster,
-                               const ExpressionPtr& outer_buffer, bool is_outer_up);
+                               const BufferExpressionPtr& outer_buffer, bool is_outer_up);
 
     BufferClusters m_clusters;
 };
diff --git a/src/common/snippets/include/snippets/lowered/pass/propagate_buffer_offset.hpp b/src/common/snippets/include/snippets/lowered/pass/propagate_buffer_offset.hpp
index a602569d793a55..d895b3a60cd26d 100644
--- a/src/common/snippets/include/snippets/lowered/pass/propagate_buffer_offset.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/propagate_buffer_offset.hpp
@@ -34,7 +34,7 @@ class PropagateBufferOffset: public Pass {
      * @brief Propagates Buffer offset to the connected memory access ops
      * @param buffer_expr expression with Buffer op with inited offset
      */
-    static void propagate(const ExpressionPtr& buffer_expr);
+    static void propagate(const BufferExpressionPtr& buffer_expr);
 };
 
 } // namespace pass
diff --git a/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp b/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp
index 8faf2419a0a313..674e8e9964ac2c 100644
--- a/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp
@@ -64,8 +64,8 @@ class SetBufferRegGroup: public RangedPass {
     static bool can_be_in_one_group(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs);
 
 private:
-    using BufferPool = std::vector<ExpressionPtr>;
-    using BufferMap = std::map<ExpressionPtr, ShiftPtrParams>;
+    using BufferPool = LinearIR::buffers;
+    using BufferMap = std::map<BufferExpressionPtr, ShiftPtrParams>;
 
     /**
      * @brief Get Buffer Index in Buffer set
@@ -73,7 +73,7 @@ class SetBufferRegGroup: public RangedPass {
      * @param pool set of Buffers from the Linear IR
      * @return index of target Buffer expression in set
      */
-    static size_t get_buffer_idx(const ExpressionPtr& target, const BufferPool& pool);
+    static size_t get_buffer_idx(const BufferExpressionPtr& target, const BufferPool& pool);
     /**
      * @brief Create adjacency matrix for Buffer system. See comment in the method for more details.
      * @param linear_ir the target Linear IR
@@ -99,8 +99,8 @@ class SetBufferRegGroup: public RangedPass {
      * @param buffers set of Buffers from the Linear IR
      * @param adj Target adjacency matrix
      */
-    static void update_adj_matrix(const std::pair<ExpressionPtr, ShiftPtrParams>& lhs,
-                                  const std::pair<ExpressionPtr, ShiftPtrParams>& rhs,
+    static void update_adj_matrix(const std::pair<BufferExpressionPtr, ShiftPtrParams>& lhs,
+                                  const std::pair<BufferExpressionPtr, ShiftPtrParams>& rhs,
                                   const BufferPool& buffers,
                                   std::vector<bool>& adj);
     /**
@@ -109,8 +109,8 @@ class SetBufferRegGroup: public RangedPass {
      * @param rhs Pair where first value is Expression with second Buffer and second value is data pointer shift params for it
      * @return Returns True if they are adjacent, otherwise returns False
      */
-    static bool are_adjacent(const std::pair<ExpressionPtr, ShiftPtrParams>& lhs,
-                             const std::pair<ExpressionPtr, ShiftPtrParams>& rhs);
+    static bool are_adjacent(const std::pair<BufferExpressionPtr, ShiftPtrParams>& lhs,
+                             const std::pair<BufferExpressionPtr, ShiftPtrParams>& rhs);
 
     /**
      * @brief Find all buffers that are connected to the current LoopEnd
diff --git a/src/common/snippets/include/snippets/lowered/pass/solve_buffer_memory.hpp b/src/common/snippets/include/snippets/lowered/pass/solve_buffer_memory.hpp
index 74f2994deec971..c3e6564f9bdfec 100644
--- a/src/common/snippets/include/snippets/lowered/pass/solve_buffer_memory.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/solve_buffer_memory.hpp
@@ -35,32 +35,33 @@ class SolveBufferMemory : public Pass {
     bool run(lowered::LinearIR& linear_ir) override;
 
 private:
+    using Buffers = LinearIR::buffers;
     /**
      * @brief Split buffer expressions of Linear IR into
      *        static (with defined allocation size) and dynamic (with unknown size) buffers
      * @param buffer_expressions buffer expressions
      * @return the pair of static and dynamic buffer expressions
      */
-    std::pair<LinearIR::container, LinearIR::container> extract_static_and_dynamic_buffers(const LinearIR::container& buffer_expressions);
+    std::pair<Buffers, Buffers> extract_static_and_dynamic_buffers(const Buffers& buffer_expressions);
     /**
      * @brief Initializes boxes for MemorySolver
      * @param buffer_expressions buffer expressions
      * @param linear_ir linear ir
      * @return vector of boxes for MemorySolver
      */
-    std::vector<ov::MemorySolver::Box> init_boxes(const LinearIR::container& buffer_expressions, const LinearIR& linear_ir);
+    std::vector<ov::MemorySolver::Box> init_boxes(const Buffers& buffer_expressions, const LinearIR& linear_ir);
     /**
      * @brief Calculate memory size and set offset to buffer with defined allocation size
      * @param static_buffer_expressions static buffer expressions
      * @param linear_ir linear ir
      */
-    void solve_static_buffer_memory(const LinearIR::container& static_buffer_expressions, const LinearIR& linear_ir);
+    void solve_static_buffer_memory(const Buffers& static_buffer_expressions, const LinearIR& linear_ir);
     /**
      * @brief Initialize offset for Buffer with undefined allocation size
      *        Note: should be called after `solve_static_buffer_memory`
      * @param dynamic_buffer_expressions dynamic buffer expressions
      */
-    void set_dynamic_buffer_offset(const LinearIR::container& dynamic_buffer_expressions);
+    void set_dynamic_buffer_offset(const Buffers& dynamic_buffer_expressions);
 
     size_t& m_static_buffer_scratchpad_size;
 
diff --git a/src/common/snippets/include/snippets/lowered/pass/validate_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/validate_buffers.hpp
new file mode 100644
index 00000000000000..b87697d054e4fb
--- /dev/null
+++ b/src/common/snippets/include/snippets/lowered/pass/validate_buffers.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "pass.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+/**
+ * @interface ValidateBuffers
+ * @brief The pass validates buffer expression in Linear IR state
+ * @ingroup snippets
+ */
+class ValidateBuffers : public RangedPass {
+public:
+    OPENVINO_RTTI("ValidateBuffers", "Pass")
+    ValidateBuffers() = default;
+    bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override;
+};
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/op/buffer.hpp b/src/common/snippets/include/snippets/op/buffer.hpp
index e990a31d28b6c0..8a2f9680d9ac56 100644
--- a/src/common/snippets/include/snippets/op/buffer.hpp
+++ b/src/common/snippets/include/snippets/op/buffer.hpp
@@ -15,93 +15,44 @@ namespace op {
 /**
  * @interface Buffer
  * @brief This is a base class for memory storage.
- *        Notes:
- *               - All buffers with the same reg_group in a graph have the same memory pointer. So if we have a few buffers,
- *                 each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer
- *               - Buffer should be a single consumer for operation output port
- * @param m_allocation_size - memory size for allocation in bytes. Dynamic value means undefined size.
- * @param m_offset - offset in common Buffer scratchpad
- * @param m_reg_group - number of register group. The Buffers from the same register group will have the same GPR
- * @param m_cluster_id - number of cluster. The Buffers from the same cluster shares memory between them and will have the same offset.
  * @ingroup snippets
  */
 class Buffer : public ov::op::Op {
+    enum class Type {
+        NewMemory,
+        IntermediateMemory
+    };
+
 public:
     OPENVINO_OP("Buffer", "SnippetsOpset");
     Buffer() = default;
-    Buffer(const OutputVector& arguments, size_t allocation_size = utils::get_dynamic_value<size_t>(), size_t reg_group = 0, size_t cluster_id = 0);
+    Buffer(const ov::Output<ov::Node>& arg);
+    Buffer(const OutputVector& arguments);
+    Buffer(const ov::Shape& shape, ov::element::Type element_type = ov::element::u8);
 
     bool visit_attributes(AttributeVisitor& visitor) override;
 
-    size_t get_reg_group() const { return m_reg_group; }
-    size_t get_cluster_id() const { return m_cluster_id; }
-    size_t get_offset() const { return m_offset; }
-    size_t get_allocation_size() const { return m_allocation_size; }
-    size_t get_byte_size() const;
-
-    void set_reg_group(size_t reg_group) { m_reg_group = reg_group; }
-    void set_cluster_id(size_t cluster) { m_cluster_id = cluster; }
-    void set_allocation_size(size_t allocation_size) { m_allocation_size = allocation_size; }
-    void set_offset(size_t offset) { m_offset = offset; }
-
-    // Returns True, if allocation size is known. Otherwise returns False - allocation size is undefined
-    bool is_defined() const;
-
-protected:
-    size_t m_allocation_size = utils::get_dynamic_value<size_t>();
-    size_t m_reg_group = 0;
-    size_t m_cluster_id = 0;
-    size_t m_offset = utils::get_dynamic_value<size_t>();
-};
-
-/**
- * @interface IntermediateMemoryBuffer
- * @brief Represents an intermediate memory storage operation. It always has a parent.
- * @ingroup snippets
- *
- */
-class IntermediateMemoryBuffer : public Buffer {
-public:
-    OPENVINO_OP("IntermediateMemoryBuffer", "SnippetsOpset", Buffer);
-    IntermediateMemoryBuffer() = default;
-    IntermediateMemoryBuffer(const OutputVector& arguments, size_t allocation_size = utils::get_dynamic_value<size_t>(),
-                             size_t reg_group = 0, size_t cluster_id = 0);
-    IntermediateMemoryBuffer(const ov::Output<ov::Node>& arg, size_t allocation_size = utils::get_dynamic_value<size_t>(),
-                             size_t reg_group = 0, size_t cluster_id = 0);
-
     void validate_and_infer_types() override;
-    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
-};
 
-/**
- * @interface NewMemoryBuffer
- * @brief Represents a new empty memory for allocation with specified shape. It has no parent operations.
- * @ingroup snippets
- *
- */
-class NewMemoryBuffer : public Buffer {
-public:
-    OPENVINO_OP("NewMemoryBuffer", "SnippetsOpset", Buffer);
-    NewMemoryBuffer() = default;
-    NewMemoryBuffer(const ov::Shape& shape, size_t reg_group = 0, size_t cluster_id = 0, ov::element::Type element_type = ov::element::u8);
-
-    void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
-    void set_element_type(ov::element::Type element_type);
+    size_t get_allocation_size() const;
 
     class ShapeInfer : public IShapeInferSnippets {
         ov::Shape m_shape;
+        Type m_type;
     public:
         explicit ShapeInfer(const std::shared_ptr<ov::Node>& n);
         Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
     };
 
-private:
-    ov::Shape m_output_shape;
-    ov::element::Type m_element_type = ov::element::u8;  // u8 - default 1 byte
+protected:
+    const Type m_type = Type::NewMemory;
+    const ov::Shape m_output_shape {};
+    const ov::element::Type m_element_type = ov::element::u8;  // u8 - default 1 byte
 };
 
+
 } // namespace op
 } // namespace snippets
 } // namespace ov
diff --git a/src/common/snippets/include/snippets/runtime_configurator.hpp b/src/common/snippets/include/snippets/runtime_configurator.hpp
index 169d63ee4baa92..660871b890b49b 100644
--- a/src/common/snippets/include/snippets/runtime_configurator.hpp
+++ b/src/common/snippets/include/snippets/runtime_configurator.hpp
@@ -218,7 +218,7 @@ class RuntimeConfigurator {
     std::vector<snippets::lowered::PortDescriptorPtr> m_io_descs = {};
     std::vector<size_t> m_io_data_sizes = {};
     // [cluster_id -> buffer expressions ]
-    std::map<size_t, std::set<lowered::ExpressionPtr>> m_dynamic_buffer_clusters = {};
+    std::map<size_t, std::set<lowered::BufferExpressionPtr>> m_dynamic_buffer_clusters = {};
     std::vector<size_t> m_ordered_loop_ids = {};
 
     std::vector<ov::snippets::VectorDims> m_latest_shapes = {};
diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp
index 7ba5e830fd3362..d76545e0a5ba40 100644
--- a/src/common/snippets/src/generator.cpp
+++ b/src/common/snippets/src/generator.cpp
@@ -29,7 +29,7 @@ LoweringResult Generator::generate(const lowered::LinearIRPtr& linear_ir, const
 
     const auto kernel_op = op::Kernel::make_kernel(*linear_ir);
     kernel_op->compile_params = compile_params;
-    const auto kernel_expr = linear_ir->create_expression(kernel_op, std::vector<lowered::PortConnectorPtr>{});
+    const auto kernel_expr = linear_ir->get_expr_factory()->build<>(kernel_op, std::vector<lowered::PortConnectorPtr>{});
     const auto kernel = target->get(kernel_expr->get_node()->get_type_info())(kernel_expr);
 
     kernel->emit_code({}, {});
@@ -74,8 +74,7 @@ RegType Generator::get_op_out_reg_type(const ov::Output<Node>& out) const {
         std::dynamic_pointer_cast<op::LoopBegin>(op) ||
         std::dynamic_pointer_cast<op::LoopEnd>(op) ||
         std::dynamic_pointer_cast<op::Brgemm>(op) ||
-        std::dynamic_pointer_cast<op::IntermediateMemoryBuffer>(op) ||
-        std::dynamic_pointer_cast<op::NewMemoryBuffer>(op) ||
+        std::dynamic_pointer_cast<op::Buffer>(op) ||
         std::dynamic_pointer_cast<op::RankNormalization>(op) ||
         std::dynamic_pointer_cast<op::Reshape>(op) ||
         std::dynamic_pointer_cast<snippets::op::Store>(op)
diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp
index 3c4391da3a7250..01d301fdb86063 100644
--- a/src/common/snippets/src/lowered/expression.cpp
+++ b/src/common/snippets/src/lowered/expression.cpp
@@ -25,23 +25,20 @@ Expression::Expression(const std::shared_ptr<Node>& n, const std::shared_ptr<ISh
     for (const auto& output : n->outputs()) {
         m_output_port_descriptors.push_back(PortDescriptorUtils::get_port_descriptor_ptr(output));
     }
+    validate_attributes();
 }
 
 Expression::Expression(const Expression& other) :
     std::enable_shared_from_this<Expression>(other), m_source_node(other.m_source_node), m_emitter(other.m_emitter),
     m_loop_ids(other.m_loop_ids), m_shapeInference(other.m_shapeInference), m_need_shape_infer(other.m_need_shape_infer),
     m_exec_num(other.m_exec_num) {
-    auto clone_ports_descriptors = [](const std::vector<PortDescriptorPtr>& src, std::vector<PortDescriptorPtr>& dst) {
-        dst.resize(src.size());
-        for (size_t i = 0; i < src.size(); i++)
-            dst[i] = src[i]->clone();
-    };
-    clone_ports_descriptors(other.m_input_port_descriptors, m_input_port_descriptors);
-    clone_ports_descriptors(other.m_output_port_descriptors, m_output_port_descriptors);
+    m_input_port_descriptors = {};
+    m_output_port_descriptors = {};
     // Note that connectors are not filled on purpose, since you need a shared pointer to this to initialize them,
     // which is not available in constructor. Also, an expression copy is rarely expected to use the same connectors.
     m_input_port_connectors = {};
     m_output_port_connectors = {};
+    validate_attributes();
 }
 
 const PortConnectorPtr& Expression::get_input_port_connector(size_t i) const {
@@ -96,13 +93,17 @@ void Expression::set_reg_info(const RegInfo& rinfo) {
     }
 }
 
+ void Expression::validate_attributes() const {
+    OPENVINO_ASSERT(m_source_node != nullptr,
+                "The expression has null source node");
+ }
+
 void Expression::validate() const {
+    validate_attributes();
     OPENVINO_ASSERT(m_input_port_descriptors.size() == m_input_port_connectors.size(),
                     "The count of input ports and input port connectors must be equal");
     OPENVINO_ASSERT(m_output_port_descriptors.size() == m_output_port_connectors.size(),
                     "The count of output ports and output port connectors must be equal");
-    OPENVINO_ASSERT(m_source_node != nullptr,
-                    "The expression has null source node");
 }
 
 void Expression::set_input_port_connector(size_t port, PortConnectorPtr to) {
@@ -130,13 +131,12 @@ void Expression::set_loop_ids(const std::vector<size_t>& loops) {
     m_loop_ids = loops;
 }
 
-void Expression::update_node_and_connectors(const std::vector<PortConnectorPtr>& new_inputs,
-                                            const std::shared_ptr<Node>& new_node) {
-    OPENVINO_ASSERT(m_source_node->get_type_info() == new_node->get_type_info(),
-                    "Can't clone expression for a new node with incompatible type");
+void Expression::update_port_attributes(const std::shared_ptr<Node>& new_node, const std::vector<PortConnectorPtr>& new_inputs,
+                                   const std::vector<PortDescriptorPtr>& new_in_descs, const std::vector<PortDescriptorPtr>& new_out_descs) {
+    OPENVINO_ASSERT(m_source_node->get_type_info() == new_node->get_type_info(), "Can't clone expression for a new node with incompatible type");
     m_source_node = new_node;
-    OPENVINO_ASSERT(new_inputs.size() == m_input_port_descriptors.size(),
-                    "Can't create Expression with new inputs: invalid number of input port connectors passed");
+    OPENVINO_ASSERT(new_inputs.size() == new_in_descs.size(), "Can't create Expression with new inputs: invalid number of input port connectors passed");
+    m_input_port_descriptors = new_in_descs;
     m_input_port_connectors = new_inputs;
     for (size_t i = 0; i < m_input_port_descriptors.size(); i++) {
         const auto& i_con = new_inputs[i];
@@ -144,16 +144,27 @@ void Expression::update_node_and_connectors(const std::vector<PortConnectorPtr>&
         if (!i_con->found_consumer(i_port))
             i_con->add_consumer(i_port);
     }
+    m_output_port_descriptors = new_out_descs;
     m_output_port_connectors.resize(m_output_port_descriptors.size());
     for (size_t i = 0; i < m_output_port_descriptors.size(); i++) {
         m_output_port_connectors[i] = std::make_shared<PortConnector>(get_output_port(i));
     }
 }
 
-ExpressionPtr Expression::clone_with_new_inputs(const std::vector<PortConnectorPtr>& new_inputs,
-                                                const std::shared_ptr<Node>& new_node) const {
-    const auto& expr = std::shared_ptr<Expression>(new Expression(*this));
-    expr->update_node_and_connectors(new_inputs, new_node);
+ExpressionPtr Expression::clone_with_new_inputs(const std::shared_ptr<Node>& new_node,
+                                                const std::vector<PortConnectorPtr>& new_inputs,
+                                                const std::vector<PortDescriptorPtr>& new_in_descs) const {
+    auto clone_ports_descriptors = [](const std::vector<PortDescriptorPtr>& src) {
+        std::vector<PortDescriptorPtr> dst(src.size());
+        for (size_t i = 0; i < src.size(); i++)
+            dst[i] = src[i]->clone();
+        return dst;
+    };
+    const auto& expr = clone();
+    const auto& in_descs = !new_in_descs.empty() ? new_in_descs : clone_ports_descriptors(m_input_port_descriptors);
+    const auto& out_descs = clone_ports_descriptors(m_output_port_descriptors);
+    expr->update_port_attributes(new_node, new_inputs, in_descs, out_descs);
+    expr->validate();
     return expr;
 }
 
@@ -171,7 +182,89 @@ ExpressionPtr Expression::clone_with_new_inputs(const ExpressionMap& expr_map,
             new_inputs.emplace_back(input);
         }
     }
-    return clone_with_new_inputs(new_inputs, new_node);
+    return clone_with_new_inputs(new_node, new_inputs);
+}
+
+ExpressionPtr Expression::clone() const {
+    return std::shared_ptr<Expression>(new Expression(*this));
+}
+
+bool Expression::visit_attributes(AttributeVisitor &visitor) {
+    auto is_planar_layout = [](const std::vector<size_t>& layout) {
+        for (size_t i = 0; i < layout.size(); ++i)
+            if (layout[i] != i) return false;
+        return true;
+    };
+    auto subtensor2str = [](const VectorDims& subtensor) {
+        std::stringstream ss;
+        for (size_t i = 0; i < subtensor.size(); ++i) {
+            const auto& v = subtensor[i];
+            const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" :
+                               utils::is_dynamic_value(v)  ? "?" : std::to_string(v);
+            const auto del = i < subtensor.size() - 1 ? ", " : "";
+            ss << v_str << del;
+        }
+        return ss.str();
+    };
+
+    std::vector<size_t> in_regs, out_regs;
+    std::vector<std::string> in_reg_types, out_reg_types;
+    std::vector<std::pair<std::string, ov::PartialShape>> shapes;
+    std::vector<std::pair<std::string, std::string>> subtensors;
+    std::vector<std::pair<std::string, std::vector<size_t>>> layouts;
+    for (size_t i = 0; i < get_input_count(); i++) {
+        const auto& desc = m_input_port_descriptors[i];
+        const auto& shape = desc->get_shape();
+        if (!shape.empty())
+            shapes.emplace_back("in_shape_" + std::to_string(i), ov::PartialShape(shape));
+
+        const auto& subtensor = desc->get_subtensor();
+        if (!subtensor.empty())
+            subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor));
+
+        const auto& layout = desc->get_layout();
+        if (!layout.empty() && !is_planar_layout(layout))
+            layouts.emplace_back("in_layout_" + std::to_string(i), layout);
+
+        in_reg_types.emplace_back(regTypeToStr(desc->get_reg().type));
+        in_regs.emplace_back(desc->get_reg().idx);
+    }
+    for (size_t i = 0; i < get_output_count(); i++) {
+        const auto& desc = m_output_port_descriptors[i];
+        const auto& shape = desc->get_shape();
+        if (!shape.empty())
+            shapes.emplace_back("out_shape_" + std::to_string(i), ov::PartialShape(shape));
+
+        const auto& subtensor = desc->get_subtensor();
+        if (!subtensor.empty())
+            subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor));
+
+        const auto& layout = desc->get_layout();
+        if (!layout.empty() && !is_planar_layout(layout))
+            layouts.emplace_back("out_layout_" + std::to_string(i), layout);
+
+        out_reg_types.emplace_back(regTypeToStr(desc->get_reg().type));
+        out_regs.emplace_back(desc->get_reg().idx);
+    }
+
+    if (!in_regs.empty()) {
+        visitor.on_attribute("in_regs", in_regs);
+        visitor.on_attribute("in_reg_types", in_reg_types);
+    }
+    if (!out_regs.empty()) {
+        visitor.on_attribute("out_regs", out_regs);
+        visitor.on_attribute("out_reg_types", out_reg_types);
+    }
+    for (auto& s : shapes)
+        visitor.on_attribute(s.first, s.second);
+    for (auto& s : subtensors)
+        visitor.on_attribute(s.first, s.second);
+    for (auto& s : layouts)
+        visitor.on_attribute(s.first, s.second);
+    visitor.on_attribute("loop_ids", m_loop_ids);
+    visitor.on_attribute("execution_number", m_exec_num);
+    m_source_node->visit_attributes(visitor);
+    return true;
 }
 
 ExpressionPort Expression::get_input_port(size_t i) {
diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp
index da60f9ac701b5f..c6ba395909c9f3 100644
--- a/src/common/snippets/src/lowered/expression_factory.cpp
+++ b/src/common/snippets/src/lowered/expression_factory.cpp
@@ -10,22 +10,29 @@ namespace ov {
 namespace snippets {
 namespace lowered {
 
-void LinearIR::ExpressionFactory::create_expression_inputs(const LinearIR& linear_ir, const ExpressionPtr& expr) {
-    OPENVINO_ASSERT(expr != nullptr, "Failed expression inputs creation: expression is null");
-    const auto& node = expr->get_node();
-
-    expr->m_input_port_connectors.resize(node->get_input_size(), nullptr);
-    for (const auto& input : node->inputs()) {
-        const auto input_source = input.get_source_output();
-        const auto in_index = input.get_index();
-        const auto& parent_expr = linear_ir.get_expr_by_node(input_source.get_node_shared_ptr());
-        const auto& port_connector = parent_expr->get_output_port_connector(input_source.get_index());
-        port_connector->add_consumer(expr->get_input_port(in_index));
-        expr->m_input_port_connectors[in_index] = port_connector;
+template<>
+std::shared_ptr<Expression> ExpressionFactory::build(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs) {
+    if (const auto par = ov::as_type_ptr<ov::op::v0::Parameter>(n)) {
+        return create(par, inputs, m_shape_infer_factory);
+    } else if (const auto res = ov::as_type_ptr<ov::op::v0::Result>(n)) {
+        return create(res, inputs, m_shape_infer_factory);
+    } else if (const auto loop_begin = ov::as_type_ptr<op::LoopBegin>(n)) {
+        return create(loop_begin, inputs, m_shape_infer_factory);
+    } else if (const auto loop_end = ov::as_type_ptr<op::LoopEnd>(n)) {
+        return create(loop_end, inputs, m_shape_infer_factory);
+    } else if (const auto buffer = ov::as_type_ptr<op::Buffer>(n)) {
+        return create<BufferExpression>(buffer, inputs, m_shape_infer_factory);
+#ifdef SNIPPETS_DEBUG_CAPS
+    } else if (const auto perf_counter = ov::as_type_ptr<op::PerfCountBeginBase>(n)) {
+        return create(perf_counter, inputs, m_shape_infer_factory);
+    } else if (const auto perf_counter = ov::as_type_ptr<op::PerfCountEndBase>(n)) {
+        return create(perf_counter, inputs, m_shape_infer_factory);
+#endif
     }
+    return create<>(n, inputs, m_shape_infer_factory);
 }
 
-void LinearIR::ExpressionFactory::create_expression_outputs(const ExpressionPtr& expr) {
+void ExpressionFactory::create_expression_outputs(const ExpressionPtr& expr) {
     OPENVINO_ASSERT(expr != nullptr, "Failed expression outputs creation: expression is null");
     const auto& node = expr->get_node();
 
@@ -38,7 +45,7 @@ void LinearIR::ExpressionFactory::create_expression_outputs(const ExpressionPtr&
 }
 
 // The method verifies of input port connectors to availability of the expression as consumer and add it if missed
-void LinearIR::ExpressionFactory::init_expression_inputs(const ExpressionPtr& expr, const std::vector<PortConnectorPtr>& inputs) {
+void ExpressionFactory::init_expression_inputs(const ExpressionPtr& expr, const std::vector<PortConnectorPtr>& inputs) {
     for (size_t i = 0; i < inputs.size(); ++i) {
         const auto& input = inputs[i];
         const auto consumers = input->get_consumers();
@@ -53,18 +60,21 @@ void LinearIR::ExpressionFactory::init_expression_inputs(const ExpressionPtr& ex
     expr->m_input_port_connectors = inputs;
 }
 
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<ov::op::v0::Parameter>& par, const LinearIR& linear_ir) {
+ExpressionPtr ExpressionFactory::create(const std::shared_ptr<ov::op::v0::Parameter>& par, const std::vector<PortConnectorPtr>& inputs,
+                                        const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
+    OPENVINO_ASSERT(inputs.empty(), "Parameter cannot have inputs");
     // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared<Expression>(args)
-    auto expr = std::shared_ptr<Expression>(new Expression(par, linear_ir.m_shape_infer_factory, false));
+    auto expr = std::shared_ptr<Expression>(new Expression(par, shape_infer_factory, false));
     create_expression_outputs(expr);
     expr->validate();
     return expr;
 }
 
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<ov::op::v0::Result>& res, const LinearIR& linear_ir) {
+ExpressionPtr ExpressionFactory::create(const std::shared_ptr<ov::op::v0::Result>& res, const std::vector<PortConnectorPtr>& inputs,
+                                        const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
     // Note: ctor of shared_ptr isn't friend class for Expression -> we cannot use directly make_shared<Expression>(args)
-    auto expr = std::shared_ptr<Expression>(new Expression(res, linear_ir.m_shape_infer_factory));
-    create_expression_inputs(linear_ir, expr);
+    auto expr = std::shared_ptr<Expression>(new Expression(res, shape_infer_factory));
+    init_expression_inputs(expr, inputs);
     // The Result node don't need output port (because of sense of the node). But each node in openvino must have one output at least.
     // The port descriptors are automatically created in constructor. We manually clean output ports.
     expr->m_output_port_descriptors.clear();
@@ -72,31 +82,19 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<ov::op::
     return expr;
 }
 
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<ov::Node>& n, const LinearIR& linear_ir) {
-    OPENVINO_ASSERT(!ov::is_type<op::LoopBase>(n), "Default expression builder doesn't support LoopBegin and LoopEnd");
-    // Note: ctor of shared_ptr isn't friend class for Expression
-    auto expr = std::shared_ptr<Expression>(new Expression(n, linear_ir.m_shape_infer_factory));
-    create_expression_inputs(linear_ir, expr);
-    create_expression_outputs(expr);
-    expr->validate();
-    return expr;
-}
-
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<op::LoopBegin>& n,
-                                                  const std::vector<PortConnectorPtr>& inputs,
-                                                  const LinearIR& linear_ir) {
+ExpressionPtr ExpressionFactory::create(const std::shared_ptr<op::LoopBegin>& n, const std::vector<PortConnectorPtr>& inputs,
+                                        const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
     OPENVINO_ASSERT(inputs.empty(), "LoopBegin cannot have inputs");
-    auto expr = std::shared_ptr<Expression>(new Expression(n, linear_ir.m_shape_infer_factory, false));
+    auto expr = std::shared_ptr<Expression>(new Expression(n, shape_infer_factory, false));
     init_expression_inputs(expr, inputs);
     create_expression_outputs(expr);
     expr->validate();
     return expr;
 }
 
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<op::LoopEnd>& n,
-                                                  const std::vector<PortConnectorPtr>& inputs,
-                                                  const LinearIR& linear_ir) {
-    auto expr = std::shared_ptr<Expression>(new Expression(n, linear_ir.m_shape_infer_factory, false));
+ExpressionPtr ExpressionFactory::create(const std::shared_ptr<op::LoopEnd>& n, const std::vector<PortConnectorPtr>& inputs,
+                                        const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
+    auto expr = std::shared_ptr<Expression>(new Expression(n, shape_infer_factory, false));
     expr->m_input_port_descriptors.resize(inputs.size(), nullptr);
     for (size_t i = 0; i < inputs.size() - 1; ++i) {
         expr->m_input_port_descriptors[i] = std::make_shared<PortDescriptor>();
@@ -113,23 +111,22 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<op::Loop
 }
 
 #ifdef SNIPPETS_DEBUG_CAPS
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<op::PerfCountBeginBase>& n,
-                                                  const std::vector<PortConnectorPtr>& inputs,
-                                                  const LinearIR& linear_ir) {
-    OPENVINO_ASSERT(inputs.empty(), "PerfCountBegin factory do not accept any input connectors");
-    return create_without_connections(n, linear_ir);
+ExpressionPtr ExpressionFactory::create(const std::shared_ptr<op::PerfCountBeginBase>& n, const std::vector<PortConnectorPtr>& inputs,
+                                        const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
+    OPENVINO_ASSERT(inputs.empty(), "PerfCountBegin shape_infer_factory do not accept any input connectors");
+    return create_without_connections(n, shape_infer_factory);
 }
 
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<op::PerfCountEndBase>& n,
-                                                  const std::vector<PortConnectorPtr>& inputs,
-                                                  const LinearIR& linear_ir) {
-    OPENVINO_ASSERT(inputs.empty(), "PerfCountEnd factory do not accept any input connectors");
-    return create_without_connections(n, linear_ir);
+ExpressionPtr ExpressionFactory::create(const std::shared_ptr<op::PerfCountEndBase>& n,
+                                        const std::vector<PortConnectorPtr>& inputs,
+                                        const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
+    OPENVINO_ASSERT(inputs.empty(), "PerfCountEnd shape_infer_factory do not accept any input connectors");
+    return create_without_connections(n, shape_infer_factory);
 }
 
-ExpressionPtr LinearIR::ExpressionFactory::create_without_connections(const std::shared_ptr<ov::Node>& n,
-                                                                      const LinearIR& linear_ir) {
-    auto expr = std::shared_ptr<Expression>(new Expression(n, linear_ir.m_shape_infer_factory, false));
+ExpressionPtr ExpressionFactory::create_without_connections(const std::shared_ptr<ov::Node>& n,
+                                                            const std::shared_ptr<IShapeInferSnippetsFactory>& shape_infer_factory) {
+    auto expr = std::shared_ptr<Expression>(new Expression(n, shape_infer_factory, false));
     expr->m_input_port_descriptors.clear();
     expr->m_output_port_descriptors.clear();
     expr->validate();
@@ -137,22 +134,6 @@ ExpressionPtr LinearIR::ExpressionFactory::create_without_connections(const std:
 }
 #endif
 
-ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr<ov::Node>& n,
-                                                  const std::vector<PortConnectorPtr>& inputs,
-                                                  const LinearIR& linear_ir) {
-    OPENVINO_ASSERT(!ov::is_type<ov::op::v0::Parameter>(n) &&
-                    !ov::is_type<ov::op::v0::Result>(n),
-                    "Expression builder with inputs doesn't support Result and Parameter");
-    auto expr = std::shared_ptr<Expression>(new Expression(n, linear_ir.m_shape_infer_factory));
-    init_expression_inputs(expr, inputs);
-    create_expression_outputs(expr);
-    expr->validate();
-    // todo: here we blindly synchronize input shapes from parent and child. Remove this when shapes will be stored in
-    //  port connector itself
-    if (linear_ir.m_shape_infer_factory)
-        expr->updateShapes();
-    return expr;
-}
 }// namespace lowered
 }// namespace snippets
 }// namespace ov
diff --git a/src/common/snippets/src/lowered/expressions/buffer_expression.cpp b/src/common/snippets/src/lowered/expressions/buffer_expression.cpp
new file mode 100644
index 00000000000000..7bf2b00da7d6ed
--- /dev/null
+++ b/src/common/snippets/src/lowered/expressions/buffer_expression.cpp
@@ -0,0 +1,143 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "snippets/lowered/expressions/buffer_expression.hpp"
+
+#include "snippets/lowered/loop_manager.hpp"
+#include "snippets/op/buffer.hpp"
+
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+
+BufferExpression::BufferExpression(const BufferExpression& other)
+    : Expression(other), m_allocation_size(other.m_allocation_size), m_reg_group(other.m_reg_group),
+      m_cluster_id(other.m_cluster_id), m_offset(other.m_offset) {}
+
+BufferExpression::BufferExpression(const std::shared_ptr<Node>& n, const std::shared_ptr<IShapeInferSnippetsFactory>& factory)
+    : Expression(n, factory) {
+    const auto& buffer = ov::as_type_ptr<op::Buffer>(get_node());
+    OPENVINO_ASSERT(buffer, "BufferExpression expects Buffer op");
+    m_allocation_size = buffer->get_allocation_size();
+}
+
+ExpressionPtr BufferExpression::clone() const {
+    return std::shared_ptr<BufferExpression>(new BufferExpression(*this));
+}
+
+void BufferExpression::validate_attributes() const {
+    Expression::validate_attributes();
+    OPENVINO_ASSERT(ov::is_type<op::Buffer>(get_node()), "BufferExpression expects Buffer op");
+}
+
+bool BufferExpression::visit_attributes(AttributeVisitor &visitor) {
+    auto allocation_size = utils::value2str(m_allocation_size);
+    auto offset = utils::value2str(m_offset);
+    visitor.on_attribute("allocation_size", allocation_size);
+    visitor.on_attribute("offset", offset);
+    visitor.on_attribute("reg_group", m_reg_group);
+    visitor.on_attribute("cluster_id", m_cluster_id);
+    return true;
+}
+
+bool BufferExpression::is_defined() const {
+    return !utils::is_dynamic_value(m_allocation_size);
+}
+
+size_t BufferExpression::get_byte_size() const {
+    if (is_defined())
+        return m_allocation_size * get_node()->get_output_element_type(0).size();
+    return utils::get_dynamic_value<size_t>();
+}
+
+namespace {
+std::vector<size_t> get_parent_inner_loops(const std::vector<size_t>& parent_loops, const std::vector<size_t>& current_loops) {
+    const auto common_rank = std::min(parent_loops.size(), current_loops.size());
+    size_t i = 0;
+    while (i < common_rank && parent_loops[i] == current_loops[i])
+        ++i;
+    return std::vector<size_t>(parent_loops.cbegin() + i, parent_loops.cend());
+}
+}  // namespace
+
+// Ticket: 113744
+// TODO: This logic covers only several specific cases so it should be generalized.
+void BufferExpression::init_allocation_size(const std::shared_ptr<LoopManager>& loop_manager, size_t allocation_rank) {
+    // Note: Buffer expressions can have more than one parent after the loops splitting transformation, but only the last parent
+    // can be used to access valid loop ports. More info in the ticket: 146646
+    const auto buffer_in_idx = get_input_count() - 1;
+    const auto& parent_port = get_input_port_connector(buffer_in_idx)->get_source();
+    const auto& parent_loop_ids = get_parent_inner_loops(parent_port.get_expr()->get_loop_ids(), get_loop_ids());
+    const auto planar_shape = utils::get_preordered_vdims(parent_port);
+
+    const size_t rank = allocation_rank >= 0 ? std::min(static_cast<size_t>(allocation_rank), planar_shape.size())
+                                             : planar_shape.size();
+
+    const auto& subtensor = ov::snippets::utils::get_projected_subtensor(parent_port);
+
+    auto hard_equal = [&parent_port](const LoopPort& port) {
+        return *port.expr_port == parent_port;
+    };
+    auto soft_equal = [&](const LoopPort& loop_port) {
+        const auto& port = *loop_port.expr_port;
+        // Check semantic of LoopPort
+        if (parent_port.get_index() != port.get_index() ||
+            port.get_expr()->get_node()->get_type_info() != parent_port.get_expr()->get_node()->get_type_info())
+            return false;
+        // Check that this LoopPort is connected to the same by semantic Buffer
+        const auto consumers = port.get_connected_ports();
+        for (const auto& consumer : consumers) {
+            if (const auto buffer_consumer = ov::as_type_ptr<BufferExpression>(consumer.get_expr())) {
+                if (buffer_consumer->get_cluster_id() == m_cluster_id && consumer.get_index() == buffer_in_idx)
+                    return true;
+            }
+        }
+        return false;
+    };
+
+    m_allocation_size = 1;
+    std::set<size_t> processed_dim_idxs;
+    for (const auto& parent_loop : parent_loop_ids) {
+        const auto loop_info = loop_manager->get_loop_info(parent_loop);
+        const auto& output_ports = loop_info->get_output_ports();
+        auto it = std::find_if(output_ports.begin(), output_ports.end(), hard_equal);
+        // [149219] : Try to find original loop port if this LoopInfo is cloned after InsertSpecificIterations
+        //            and ports are not mapped on the original ExpressionPorts
+        if (it == output_ports.end()) {
+            it = std::find_if(output_ports.begin(), output_ports.end(), soft_equal);
+            OPENVINO_ASSERT(it != output_ports.end(), "compute_allocation_shape: output port of parent loop can not be found");
+        }
+        const auto& loop_port = *it;
+        const auto& dim_idx = loop_port.dim_idx;
+        if (loop_port.is_incremented && dim_idx < rank) {
+            if (const auto& unified_loop_info = ov::as_type_ptr<UnifiedLoopInfo>(loop_info))
+                m_allocation_size = utils::dynamic_safe_mul(m_allocation_size, unified_loop_info->get_work_amount());
+            else if (const auto& expanded_loop_info = ov::as_type_ptr<ExpandedLoopInfo>(loop_info))
+                m_allocation_size = utils::dynamic_safe_mul(m_allocation_size, expanded_loop_info->get_unified_loop_info()->get_work_amount());
+            else
+                OPENVINO_THROW("Unknown LoopInfo type");
+            processed_dim_idxs.insert(dim_idx);
+        }
+    }
+    const auto processing_rank = !processed_dim_idxs.empty() ? std::max(*processed_dim_idxs.rbegin(), subtensor.size()) : subtensor.size();
+    for (size_t i = 0; i < std::min(processing_rank, rank); ++i) {
+        if (processed_dim_idxs.count(i) == 0) {
+            const auto multiplier = i < subtensor.size() ? *(subtensor.rbegin() + i) : *(planar_shape.rbegin() + i);
+            m_allocation_size = utils::dynamic_safe_mul(m_allocation_size, multiplier);
+        }
+    }
+
+    // Corner case when the current information is not enough
+    if (processing_rank == 0 && processed_dim_idxs.empty()) {
+        for (size_t i = 0; i < rank; ++i) {
+            m_allocation_size = utils::dynamic_safe_mul(m_allocation_size, *(planar_shape.rbegin() + i));
+        }
+    }
+}
+
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp
index 09640196b1fa17..6c5afadecb0285 100644
--- a/src/common/snippets/src/lowered/linear_ir.cpp
+++ b/src/common/snippets/src/lowered/linear_ir.cpp
@@ -25,7 +25,8 @@ LinearIR::LinearIR(Config config, const std::shared_ptr<IShapeInferSnippetsFacto
       m_config(std::move(config)),
       m_loop_manager(std::make_shared<LoopManager>()),
       m_shape_infer_factory(factory),
-      m_shape_infer(std::make_shared<LIRShapeInfer>(m_expressions, m_parameter_expressions, m_result_expressions)) {}
+      m_shape_infer(std::make_shared<LIRShapeInfer>(m_expressions, m_parameter_expressions, m_result_expressions)),
+      m_expression_factory(std::make_shared<ExpressionFactory>(m_shape_infer_factory)) {}
 
 LinearIR::LinearIR(const std::shared_ptr<ov::Model>& model,
                    const std::shared_ptr<IShapeInferSnippetsFactory>& factory,
@@ -34,7 +35,7 @@ LinearIR::LinearIR(const std::shared_ptr<ov::Model>& model,
     constExprIt last_param = m_expressions.end();
     for (const auto& n : get_ordered_ops(model)) {
         constExprIt insertion_pos = m_expressions.end();
-        const auto expr = create_expression(n);
+        const auto expr = get_expr_factory()->build<>(n, get_expression_inputs_by_node(n));
 
         // Scalar should be on the Linear IR beginning after Parameters to have valid expression order after Loop passes.
         // After these passes we must call pass MoveScalarToConsumer() to have a correct accuracy.
@@ -43,8 +44,11 @@ LinearIR::LinearIR(const std::shared_ptr<ov::Model>& model,
             insertion_pos = std::next(last_param);
         }
 
-        // exec_num = 0 since `insertion_pos` can be changed
-        register_expression(expr, true, 0);
+        // Some utils containers (for example, buffers) in Lir contain expressions in execution order
+        // so we have to pass exec order to registration. However, this enumeration is not optimal because
+        // the next each expr will has exec_num = prev_expr->exec_num + 1.
+        // For more efficient execution ordering we have to call "enumerate_expressions" in the end of LIR initialization
+        register_expression(expr, true, get_inserted_expr_exec_num(insertion_pos));
         const auto& it = m_expressions.insert(insertion_pos, expr);
         if (ov::is_type<ov::op::v0::Parameter>(n))
             last_param = it;
@@ -57,12 +61,21 @@ LinearIR::LinearIR(const std::shared_ptr<ov::Model>& model,
     enumerate_expressions();
 }
 
-ExpressionPtr LinearIR::create_expression(const std::shared_ptr<Node>& n) {
-    return ExpressionFactory::build(n, *this);
+const ExpressionFactoryPtr& LinearIR::get_expr_factory() const {
+    OPENVINO_ASSERT(m_expression_factory, "ExpresstionFactory is missed!");
+    return m_expression_factory;
 }
 
-ExpressionPtr LinearIR::create_expression(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& inputs) const {
-    return ExpressionFactory::build(n, inputs, *this);
+std::vector<PortConnectorPtr> LinearIR::get_expression_inputs_by_node(const std::shared_ptr<Node>& n) const {
+    OPENVINO_ASSERT(n != nullptr, "Failed expression inputs getting: node is null");
+    std::vector<PortConnectorPtr> inputs(n->get_input_size(), nullptr);
+    for (const auto& input : n->inputs()) {
+        const auto input_source = input.get_source_output();
+        const auto in_index = input.get_index();
+        const auto& parent_expr = get_expr_by_node(input_source.get_node_shared_ptr());
+        inputs[in_index] = parent_expr->get_output_port_connector(input_source.get_index());
+    }
+    return inputs;
 }
 
 namespace {
@@ -84,7 +97,7 @@ void update_consumers_and_regs(const ExpressionPtr& new_expr, const std::vector<
 ExpressionPtr LinearIR::create_expression(const std::shared_ptr<Node>& n, const std::vector<PortConnectorPtr>& new_inputs,
                                           const std::vector<size_t>& loop_ids, bool update_loop_ports,
                                           const std::vector<std::set<ExpressionPort>>& consumers) {
-    const auto new_expr = create_expression(n, new_inputs);
+    const auto new_expr = get_expr_factory()->build<>(n, new_inputs);
     update_consumers_and_regs(new_expr, consumers);
     new_expr->set_loop_ids(loop_ids);
 
@@ -178,13 +191,20 @@ void LinearIR::register_expression(const ExpressionPtr& expr, bool io_allowed, d
                     "LinearIR::insert can't be used to add Parameters or Results to IR");
     const auto& res = m_node2expression_map.insert({node, expr});
     OPENVINO_ASSERT(res.second, "Duplicate node is detected in linear IR: ", node);
+
+    expr->m_exec_num = exec_num;
+
     if (ov::is_type<ov::op::v0::Parameter>(node))
         m_parameter_expressions.push_back(expr);
     if (ov::is_type<ov::op::v0::Result>(node))
         m_result_expressions.push_back(expr);
-    if (ov::is_type<op::Buffer>(node))
-        m_buffer_expressions.push_back(expr);
-    expr->m_exec_num = exec_num;
+    if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(expr)) {
+        // just to align with execution order
+        auto it = m_buffer_expressions.cbegin();
+        while (it != m_buffer_expressions.cend() && expr->m_exec_num > (*it)->get_exec_num())
+            ++it;
+        m_buffer_expressions.insert(it, buffer_expr);
+    }
 }
 
 void LinearIR::unregister_expression(const ExpressionPtr& expr) {
@@ -197,9 +217,9 @@ void LinearIR::unregister_expression(const ExpressionPtr& expr) {
     m_node2expression_map.erase(node);
     OPENVINO_ASSERT(!ov::is_type<ov::op::v0::Parameter>(node) && !ov::is_type<ov::op::v0::Result>(node),
                     "unregister_expression mustn't be called for parameter or result expressions");
-    if (ov::is_type<op::Buffer>(node)) {
-        const auto& it = std::find(m_buffer_expressions.cbegin(), m_buffer_expressions.cend(), expr);
-        OPENVINO_ASSERT(it != m_buffer_expressions.cend(), "Buffer Expression has not been found in the list of LinearIR Buffers!");
+    if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(expr)) {
+        const auto& it = std::find(m_buffer_expressions.cbegin(), m_buffer_expressions.cend(), buffer_expr);
+        OPENVINO_ASSERT(it != m_buffer_expressions.cend(), "BufferExpression has not been found in the list of LinearIR Buffers!");
         m_buffer_expressions.erase(it);
     }
 }
@@ -245,7 +265,7 @@ LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& n
 }
 
 LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const std::shared_ptr<Node>& n) {
-    const auto& expr = create_expression(n);
+    const auto& expr = get_expr_factory()->build<>(n, get_expression_inputs_by_node(n));
     register_expression(expr, m_config.m_manual_build_support, get_inserted_expr_exec_num(pos));
     return m_expressions.insert(pos, expr);
 }
@@ -338,6 +358,18 @@ LinearIR::exprIt LinearIR::insert_node(const std::shared_ptr<ov::Node>& new_node
     return insert_node(new_node, new_inputs, loop_ids, update_loop_ports, place, consumers);
 }
 
+LinearIR::exprIt LinearIR::insert_expr(const ExpressionPtr& new_expr, const std::vector<size_t>& loop_ids,
+                                       bool update_loop_ports, const constExprIt& place, const std::vector<std::set<ExpressionPort>>& consumers) {
+    update_consumers_and_regs(new_expr, consumers);
+    new_expr->set_loop_ids(loop_ids);
+
+    const auto expr_it = insert(place, new_expr);
+    if (update_loop_ports)
+        get_loop_manager()->update_loop_ports(new_expr);
+
+    return expr_it;
+}
+
 LinearIR::exprIt LinearIR::replace_with_node(const std::vector<ExpressionPtr>& old_exprs, const std::shared_ptr<ov::Node>& new_node,
                                              const std::vector<size_t>& loop_ids, const constExprIt& place) {
     OPENVINO_ASSERT(!old_exprs.empty(), "Failed to replace node: there are no old expressions for replacing");
diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
index d72e35ceac533b..f76c4097b38f38 100644
--- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
+++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
@@ -28,7 +28,7 @@ bool AllocateBuffers::run(lowered::LinearIR& linear_ir, lowered::LinearIR::const
     size_t buffer_scratchpad_size = 0;
 
     PassPipeline pipeline;
-    pipeline.register_pass<ComputeBufferAllocationSize>(linear_ir.get_config().m_loop_depth);
+    pipeline.register_pass<ComputeBufferAllocationSize>();
     if (m_is_optimized_mode) {
         pipeline.register_pass<SetBufferRegGroup>();
         pipeline.register_pass<DefineBufferClusters>();
diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp
index e071460e5d85f1..2f921214bffed4 100644
--- a/src/common/snippets/src/lowered/pass/assign_registers.cpp
+++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp
@@ -84,25 +84,22 @@ bool AssignRegisters::run(LinearIR& linear_ir) {
     auto accumulator_reg = 0lu;
     for (const auto& expr : exprs) {
         auto op = expr->get_node();
-        if (const auto& buffer = ov::as_type_ptr<op::Buffer>(op)) {
-            const auto reg_group = buffer->get_reg_group();
+        if (const auto& buffer_expr = ov::as_type_ptr<BufferExpression>(expr)) {
+            const auto reg_group = buffer_expr->get_reg_group();
             // All buffers have one common data pointer
-            if (ov::is_type<op::IntermediateMemoryBuffer>(buffer)) {
-                const auto assigned_reg = num_results + num_parameters + reg_group;
-                for (const auto& input : expr->get_input_port_connectors()) {
-                    manually_assigned_gprs[input] = static_cast<Reg>(assigned_reg);
-                    // shape infer ops in the middle of subgraph. IntermediateMemoryBuffer is inserted before reshape as new loop should start.
-                    // child shape info ops share the same memory as IntermediateMemoryBuffer.
-                    const auto& shape_infer_consumers = utils::get_first_child_shape_infer_expr_seq(expr);
-                    for (const auto& child_shape_infer_expr : shape_infer_consumers) {
-                        manually_assigned_gprs[child_shape_infer_expr->get_input_port_connector(0)] =
-                            manually_assigned_gprs[child_shape_infer_expr->get_output_port_connector(0)] =
-                                static_cast<Reg>(assigned_reg);
-                    }
+            const auto assigned_reg = num_results + num_parameters + reg_group;
+            for (const auto& input : expr->get_input_port_connectors()) {
+                manually_assigned_gprs[input] = static_cast<Reg>(assigned_reg);
+                // shape infer ops in the middle of subgraph. Buffer is inserted before reshape as new loop should start.
+                // child shape info ops share the same memory as Buffer.
+                const auto& shape_infer_consumers = utils::get_first_child_shape_infer_expr_seq(expr);
+                for (const auto& child_shape_infer_expr : shape_infer_consumers) {
+                    manually_assigned_gprs[child_shape_infer_expr->get_input_port_connector(0)] =
+                        manually_assigned_gprs[child_shape_infer_expr->get_output_port_connector(0)] =
+                            static_cast<Reg>(assigned_reg);
                 }
             }
-            manually_assigned_gprs[expr->get_output_port_connector(0)] =
-                    static_cast<Reg>(num_results + num_parameters + reg_group);
+            manually_assigned_gprs[expr->get_output_port_connector(0)] = static_cast<Reg>(assigned_reg);
         } else if (ov::is_type<op::HorizonMax>(op) || ov::is_type<op::HorizonSum>(op)) {
             // Only in ReduceDecomposition Reduce ops use HorizonMax/HorizonSum and VectorBuffer.
             // We should manually set the one vector register for VectorBuffer and Max/Sum output to simulate a accumulator
diff --git a/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp b/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp
index 4cf201047d63f5..e0397b03224bc3 100644
--- a/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp
+++ b/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp
@@ -32,10 +32,10 @@ bool CleanRepeatedDataPointerShifts::reuse_increments(const LoopManagerPtr& loop
     std::set<ExpressionPtr> read_data_exprs;
     for (size_t i = 0; i < input_count; ++i) {
         const auto& parent_output = loop_connectors[i]->get_source().get_expr();
-        if (const auto buffer = ov::as_type_ptr<op::Buffer>(parent_output->get_node())) {
+        if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(parent_output)) {
             // If Buffer is missed in set, Just save - it's first meeting
-            if (buffers_groups.count(buffer->get_reg_group()) == 0) {
-                buffers_groups.insert(buffer->get_reg_group());
+            if (buffers_groups.count(buffer_expr->get_reg_group()) == 0) {
+                buffers_groups.insert(buffer_expr->get_reg_group());
             } else {
                 // The Buffer with the same ID is in set - need to add this Buffer idx to set of Buffers for resetting
                 resetting_data_indexes.insert(i);
@@ -56,17 +56,17 @@ bool CleanRepeatedDataPointerShifts::reuse_increments(const LoopManagerPtr& loop
         size_t buffer_count = 0;
         size_t loop_count = 0;
         for (const auto& consumer_input : consumer_inputs) {
-            const auto& child_node = consumer_input.get_expr()->get_node();
-            if (const auto buffer = ov::as_type_ptr<op::Buffer>(child_node)) {
+            const auto& consumer = consumer_input.get_expr();
+            if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(consumer)) {
                 buffer_count++;
                 // If Buffer is missed in set, Just save - it's first meeting
-                if (buffers_groups.count(buffer->get_reg_group()) == 0) {
-                    buffers_groups.insert(buffer->get_reg_group());
+                if (buffers_groups.count(buffer_expr->get_reg_group()) == 0) {
+                    buffers_groups.insert(buffer_expr->get_reg_group());
                 } else {
                     // The Buffer with the same ID is in set - need to add this Buffer idx to set of Buffers for resetting
                     resetting_data_indexes.insert(input_count + i);
                 }
-            } else if (ov::is_type<op::LoopEnd>(child_node)) {
+            } else if (ov::is_type<op::LoopEnd>(consumer->get_node())) {
                 loop_count++;
             }
         }
diff --git a/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp b/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp
index 85bbed324a9865..c6f0b9bcb936cb 100644
--- a/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp
+++ b/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp
@@ -14,112 +14,16 @@ namespace snippets {
 namespace lowered {
 namespace pass {
 
-namespace {
-std::vector<size_t> get_parent_inner_loops(const std::vector<size_t>& parent_loops, const std::vector<size_t>& current_loops) {
-    const auto common_rank = std::min(parent_loops.size(), current_loops.size());
-    size_t i = 0;
-    while (i < common_rank && parent_loops[i] == current_loops[i])
-        ++i;
-    return std::vector<size_t>(parent_loops.cbegin() + i, parent_loops.cend());
-}
-}  // namespace
-
-// Ticket: 113744
-// TODO: This logic covers only several specific cases so it should be generalized.
-size_t ComputeBufferAllocationSize::get_allocation_size(const LoopManagerPtr& loop_manager, const ExpressionPtr& buffer_expr, size_t allocation_rank) {
-    const auto& current_buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-    OPENVINO_ASSERT(current_buffer, "`get_allocation_size` expected Buffer");
-
-    // Note: Buffer expressions can have more than one parent after the loops splitting transformation, but only the last parent
-    // can be used to access valid loop ports. More info in the ticket: 146646
-    const auto buffer_in_idx = buffer_expr->get_input_count() - 1;
-    const auto& parent_port = buffer_expr->get_input_port_connector(buffer_in_idx)->get_source();
-    const auto& parent_loop_ids = get_parent_inner_loops(parent_port.get_expr()->get_loop_ids(), buffer_expr->get_loop_ids());
-    const auto planar_shape = utils::get_preordered_vdims(parent_port);
-
-    const size_t rank = allocation_rank >= 0 ? std::min(static_cast<size_t>(allocation_rank), planar_shape.size())
-                                             : planar_shape.size();
-
-    const auto& subtensor =  ov::snippets::utils::get_projected_subtensor(parent_port);
-
-    auto hard_equal = [&parent_port](const LoopPort& port) {
-        return *port.expr_port == parent_port;
-    };
-    auto soft_equal = [&](const LoopPort& loop_port) {
-        const auto& port = *loop_port.expr_port;
-        // Check semantic of LoopPort
-        if (parent_port.get_index() != port.get_index() ||
-            port.get_expr()->get_node()->get_type_info() != parent_port.get_expr()->get_node()->get_type_info())
-            return false;
-        // Check that this LoopPort is connected to the same by semantic Buffer
-        const auto consumers = port.get_connected_ports();
-        for (const auto& consumer : consumers) {
-            if (const auto buffer_consumer = ov::as_type_ptr<op::Buffer>(consumer.get_expr()->get_node())) {
-                if (buffer_consumer->get_cluster_id() == current_buffer->get_cluster_id() && consumer.get_index() == buffer_in_idx)
-                    return true;
-            }
-        }
-        return false;
-    };
-
-    size_t allocation_size = 1;
-    std::set<size_t> processed_dim_idxs;
-    for (const auto& parent_loop : parent_loop_ids) {
-        const auto loop_info = loop_manager->get_loop_info(parent_loop);
-        const auto& output_ports = loop_info->get_output_ports();
-        auto it = std::find_if(output_ports.begin(), output_ports.end(), hard_equal);
-        // [149219] : Try to find original loop port if this LoopInfo is cloned after InsertSpecificIterations
-        //            and ports are not mapped on the original ExpressionPorts
-        if (it == output_ports.end()) {
-            it = std::find_if(output_ports.begin(), output_ports.end(), soft_equal);
-            OPENVINO_ASSERT(it != output_ports.end(), "compute_allocation_shape: output port of parent loop can not be found");
-        }
-        const auto& loop_port = *it;
-        const auto& dim_idx = loop_port.dim_idx;
-        if (loop_port.is_incremented && dim_idx < rank) {
-            if (const auto& unified_loop_info = ov::as_type_ptr<UnifiedLoopInfo>(loop_info))
-                allocation_size = utils::dynamic_safe_mul(allocation_size, unified_loop_info->get_work_amount());
-            else if (const auto& expanded_loop_info = ov::as_type_ptr<ExpandedLoopInfo>(loop_info))
-                allocation_size = utils::dynamic_safe_mul(allocation_size, expanded_loop_info->get_unified_loop_info()->get_work_amount());
-            else
-                OPENVINO_THROW("Unknown LoopInfo type");
-            processed_dim_idxs.insert(dim_idx);
-        }
-    }
-    const auto processing_rank = !processed_dim_idxs.empty() ? std::max(*processed_dim_idxs.rbegin(), subtensor.size()) : subtensor.size();
-    for (size_t i = 0; i < std::min(processing_rank, rank); ++i) {
-        if (processed_dim_idxs.count(i) == 0) {
-            const auto multiplier = i < subtensor.size() ? *(subtensor.rbegin() + i) : *(planar_shape.rbegin() + i);
-            allocation_size = utils::dynamic_safe_mul(allocation_size, multiplier);
-        }
-    }
-
-    // Corner case when the current information is not enough
-    if (processing_rank == 0 && processed_dim_idxs.empty()) {
-        for (size_t i = 0; i < rank; ++i) {
-            allocation_size = utils::dynamic_safe_mul(allocation_size, *(planar_shape.rbegin() + i));
-        }
-    }
-
-    return allocation_size;
-}
-
 bool ComputeBufferAllocationSize::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) {
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ComputeBufferAllocationSize")
 
+    const auto& allocation_rank = linear_ir.get_config().m_loop_depth;
     const auto& loop_manager = linear_ir.get_loop_manager();
-
-    const auto& buffer_expressions = linear_ir.get_buffers();
-    for (const auto& buffer_expr : buffer_expressions) {
-        const auto node = buffer_expr->get_node();
-        if (const auto buffer = ov::as_type_ptr<op::IntermediateMemoryBuffer>(node)) {
-            // If the current size is undefined, update it
-            // TODO [143395] : MemoryManager will return container with only dynamic buffers without any `is_defined()`
-            if (!buffer->is_defined())
-                buffer->set_allocation_size(get_allocation_size(loop_manager, buffer_expr, m_buffer_allocation_rank));
-        } else {
-            OPENVINO_ASSERT(ov::is_type<op::NewMemoryBuffer>(node), "Expected Buffer ops in Buffer expressions of LinearIR");
-        }
+    for (const auto& buffer_expr : linear_ir.get_buffers()) {
+        // If the current size is undefined, update it
+        // TODO [143395] : MemoryManager will return container with only dynamic buffers without any `is_defined()`
+        if (!buffer_expr->is_defined())
+            buffer_expr->init_allocation_size(loop_manager, allocation_rank);
     }
 
     return true;
diff --git a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp
index f3e065173baf9d..c43b5d63a358c6 100644
--- a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp
+++ b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp
@@ -16,17 +16,16 @@ namespace pass {
 
 using ShiftPtrParams = SetBufferRegGroup::ShiftPtrParams;
 
-DefineBufferClusters::BufferClusters::iterator DefineBufferClusters::find_cluster_by_expr(const ExpressionPtr& target) {
+DefineBufferClusters::BufferClusters::iterator DefineBufferClusters::find_cluster_by_expr(const BufferExpressionPtr& target) {
     return std::find_if(m_clusters.begin(), m_clusters.end(),
                         [&target](const BufferCluster& cluster) { return cluster.count(target) > 0; });
 }
 
-bool DefineBufferClusters::is_direct_buffer(const ExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const {
-    const auto buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-    return buffer && buffer_expr->get_loop_ids() == target_expr->get_loop_ids();
+bool DefineBufferClusters::is_direct_buffer(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const {
+    return buffer_expr && buffer_expr->get_loop_ids() == target_expr->get_loop_ids();
 }
 
-void DefineBufferClusters::create_new_cluster(const ExpressionPtr& buffer_expr) {
+void DefineBufferClusters::create_new_cluster(const BufferExpressionPtr& buffer_expr) {
     const auto cluster_it = find_cluster_by_expr(buffer_expr);
     // If Buffer is missed in clusters, create new cluster with the single Buffer node inside
     if (cluster_it == m_clusters.cend()) {
@@ -36,9 +35,8 @@ void DefineBufferClusters::create_new_cluster(const ExpressionPtr& buffer_expr)
 
 size_t DefineBufferClusters::get_cluster_buffer_id(const BufferCluster& cluster) const {
     OPENVINO_ASSERT(!cluster.empty(), "Buffer cluster is empty!");
-    const auto id = (ov::as_type_ptr<op::Buffer>(cluster.cbegin()->get()->get_node()))->get_reg_group();
-    if (std::all_of(cluster.cbegin(), cluster.cend(),
-                    [&id](const ExpressionPtr& expr) { return (ov::as_type_ptr<op::Buffer>(expr->get_node()))->get_reg_group() == id; })) {
+    const auto id = cluster.cbegin()->get()->get_reg_group();
+    if (std::all_of(cluster.cbegin(), cluster.cend(), [&id](const BufferExpressionPtr& expr) { return expr->get_reg_group() == id; })) {
         return id;
     }
     return SIZE_MAX;
@@ -53,7 +51,7 @@ DefineBufferClusters::BufferPorts DefineBufferClusters::get_input_buffers(const
 
     // Input Buffers
     for (size_t i = 0; i < in_count; ++i) {
-        const auto source_expr = connectors[i]->get_source().get_expr();
+        const auto& source_expr = ov::as_type_ptr<BufferExpression>(connectors[i]->get_source().get_expr());
         if (!is_direct_buffer(source_expr, loop_expr))
             continue;
         // Save as input Buffer
@@ -74,7 +72,7 @@ DefineBufferClusters::BufferPorts DefineBufferClusters::get_output_buffers(const
 
     for (size_t i = in_count; i < in_count + out_count; ++i) {
         for (const auto& consumer : connectors[i]->get_consumers()) {
-            auto consumer_expr = consumer.get_expr();
+            const auto& consumer_expr =  ov::as_type_ptr<BufferExpression>(consumer.get_expr());
             if (!is_direct_buffer(consumer_expr, loop_expr))
                 continue;
             // Save as output Buffer
@@ -102,7 +100,6 @@ void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) {
     for (const auto& out : output_buffers) {
         const auto output_buffer_expr = out.first;
         const auto output_buffer_port_idx = *(out.second.cbegin());  // Output port is always one
-        const auto output_buffer = ov::as_type_ptr<op::Buffer>(output_buffer_expr->get_node());
         bool has_been_added = false;
 
         for (const auto& in : input_buffers) {
@@ -110,17 +107,15 @@ void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) {
             if (visited_buffers.count(input_buffer_expr) > 0)
                 continue;
 
-            const auto input_buffer = ov::as_type_ptr<op::Buffer>(input_buffer_expr->get_node());
-
             // If allocated sizes of buffers are unkown on compilation stage (dynamic),
             // we cannot be sure that they're will be the same in runtime.
-            if (!input_buffer->is_defined()|| !output_buffer->is_defined())
+            if (!input_buffer_expr->is_defined()|| !output_buffer_expr->is_defined())
                 continue;
 
             // Memory can be reused if reading and writing are executed proportionally:
             //  - the same reading/writing order
             //  - the same buffer memory sizes
-            if ((input_buffer->get_byte_size() != output_buffer->get_byte_size()) ||
+            if ((input_buffer_expr->get_byte_size() != output_buffer_expr->get_byte_size()) ||
                 (input_buffer_expr->get_output_port_descriptor(0)->get_layout() != output_buffer_expr->get_input_port_descriptor(0)->get_layout()))
                 continue;
 
@@ -184,13 +179,13 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers,
 
     for (auto it = std::reverse_iterator<LinearIR::constExprIt>(outer_loop_end_expr_it); (*it)->get_node() != outer_loop_begin; ++it) {
         const auto& inner_expr = *it;
-        if (const auto inner_buffer = ov::as_type_ptr<op::Buffer>(inner_expr->get_node())) {
-            const auto inner_cluster_it = find_cluster_by_expr(inner_expr);
+        if (const auto inner_buffer_expr = ov::as_type_ptr<BufferExpression>(inner_expr)) {
+            const auto inner_cluster_it = find_cluster_by_expr(inner_buffer_expr);
             OPENVINO_ASSERT(inner_cluster_it != m_clusters.cend(), "Buffer cluster has not been found");
             const auto inner_cluster_id = get_cluster_buffer_id(*inner_cluster_it);
             if (inner_cluster_id == SIZE_MAX) continue;
 
-            const auto final_offset = get_buffer_finalization_offset(inner_expr);
+            const auto final_offset = get_buffer_finalization_offset(inner_buffer_expr);
 
             auto unite = [&](const BufferPorts& ports, const bool is_input) {
                 bool applied = false;
@@ -200,13 +195,13 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers,
                     // If the buffers are already in the same cluster or have different Buffer ID - skip
                     if (cluster_it == inner_cluster_it) continue;
                     // Buffer from one cluster must be only defined (with known allocation_size) or dynamic (with unknown allocation_size)
-                    if (inner_buffer->is_defined() != ov::as_type_ptr<op::Buffer>(port.first->get_node())->is_defined()) continue;
+                    if (inner_buffer_expr->is_defined() != port.first->is_defined()) continue;
 
                     bool can_be_reused = true;
                     for (const auto idx : port.second) {
                         can_be_reused = can_be_reused &&
                             can_be_data_ptr_proportionally_shifted(outer_ptr_increments[idx], outer_data_sizes[idx],
-                                                                   final_offset, inner_buffer->get_element_type().size());
+                                                                   final_offset, inner_buffer_expr->get_node()->get_element_type().size());
                     }
                     if (!can_be_reused)
                         continue;
@@ -223,7 +218,7 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers,
     }
 }
 
-int64_t DefineBufferClusters::get_buffer_finalization_offset(const ExpressionPtr& buffer_expr) const {
+int64_t DefineBufferClusters::get_buffer_finalization_offset(const BufferExpressionPtr& buffer_expr) const {
     auto index = [](const std::vector<PortConnectorPtr>& loop_inputs, const PortConnectorPtr& buffer_out) {
         const auto it = std::find(loop_inputs.cbegin(), loop_inputs.cend(), buffer_out);
         OPENVINO_ASSERT(it != loop_inputs.cend(), "Buffer output PortConnector has not been found in target LoopEnd inputs");
@@ -252,7 +247,7 @@ int64_t DefineBufferClusters::get_buffer_finalization_offset(const ExpressionPtr
 
 bool DefineBufferClusters::unite_nested_clusters(const BufferClusters::iterator& inner_cluster_it,
                                                  BufferCluster& outer_cluster,
-                                                 const ExpressionPtr& outer_buffer, bool is_outer_up) {
+                                                 const BufferExpressionPtr& outer_buffer, bool is_outer_up) {
     for (const auto& inner_buffer : *inner_cluster_it) {
         ExpressionPtr common_loop_end_expr = nullptr;
         size_t outer_idx = SIZE_MAX, inner_idx = SIZE_MAX;
@@ -267,9 +262,8 @@ bool DefineBufferClusters::unite_nested_clusters(const BufferClusters::iterator&
             const auto& inner_data_sizes = common_loop_end->get_element_type_sizes();
             if (SetBufferRegGroup::can_be_in_one_group({ inner_data_sizes[up_idx], inner_ptr_increments[up_idx], inner_final_offsets[up_idx] },
                                                        { inner_data_sizes[down_idx], inner_ptr_increments[down_idx], inner_final_offsets[down_idx] })) {
-                const auto buffer_reg_group = ov::as_type_ptr<op::Buffer>(outer_buffer->get_node())->get_reg_group();
                 for (const auto& inner_buffer : *inner_cluster_it)
-                    ov::as_type_ptr<op::Buffer>(inner_buffer->get_node())->set_reg_group(buffer_reg_group);
+                    inner_buffer->set_reg_group(outer_buffer->get_reg_group());
 
                 outer_cluster.insert(inner_cluster_it->cbegin(), inner_cluster_it->cend());
                 m_clusters.erase(inner_cluster_it);
@@ -280,7 +274,8 @@ bool DefineBufferClusters::unite_nested_clusters(const BufferClusters::iterator&
     return false;
 }
 
-bool DefineBufferClusters::are_buffer_neighbours(const ExpressionPtr& up, const ExpressionPtr& down, ExpressionPtr& loop, size_t& up_idx, size_t& down_idx) {
+bool DefineBufferClusters::are_buffer_neighbours(const BufferExpressionPtr& up, const BufferExpressionPtr& down, ExpressionPtr& loop,
+                                                 size_t& up_idx, size_t& down_idx) {
     auto find_input = [&down](const PortConnectorPtr& in) {
         return in->get_source().get_expr() == down;
     };
@@ -323,15 +318,15 @@ void DefineBufferClusters::parse_memory_access_op(const ExpressionPtr& expr) {
     // TODO: Some full MemoryAccess ops can have inplace inputs and outputs in general.
     //       Need to add mechanism of inplace ports using MemoryAccess::PortDescriptor::inplace
     for (const auto& input : expr->get_input_port_connectors()) {
-        if (is_direct_buffer(input->get_source().get_expr(), expr)) {
-            create_new_cluster(input->get_source().get_expr());
-        }
+        const auto& buffer_expr = ov::as_type_ptr<BufferExpression>(input->get_source().get_expr());
+        if (is_direct_buffer(buffer_expr, expr))
+            create_new_cluster(buffer_expr);
     }
     for (const auto& output : expr->get_output_port_connectors()) {
         for (const auto& consumer : output->get_consumers()) {
-            if (is_direct_buffer(consumer.get_expr(), expr)) {
-                create_new_cluster(consumer.get_expr());
-            }
+            const auto& buffer_expr = ov::as_type_ptr<BufferExpression>(consumer.get_expr());
+            if (is_direct_buffer(buffer_expr, expr))
+                create_new_cluster(buffer_expr);
         }
     }
 }
@@ -357,10 +352,8 @@ bool DefineBufferClusters::run(lowered::LinearIR& linear_ir, lowered::LinearIR::
 
     for (size_t cluster_id = 0; cluster_id < m_clusters.size(); ++cluster_id) {
         const auto& cluster = m_clusters[cluster_id];
-        std::for_each(cluster.cbegin(), cluster.cend(), [&cluster_id](const ExpressionPtr& buffer_expr) {
-            const auto& buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-            OPENVINO_ASSERT(buffer, "Buffer clusters expects Buffer nodes");
-            buffer->set_cluster_id(cluster_id);
+        std::for_each(cluster.cbegin(), cluster.cend(), [&cluster_id](const BufferExpressionPtr& buffer_expr) {
+            buffer_expr->set_cluster_id(cluster_id);
         });
     }
 
diff --git a/src/common/snippets/src/lowered/pass/init_buffers_default.cpp b/src/common/snippets/src/lowered/pass/init_buffers_default.cpp
index e48f833380e5e3..90a7ddf0b3d21c 100644
--- a/src/common/snippets/src/lowered/pass/init_buffers_default.cpp
+++ b/src/common/snippets/src/lowered/pass/init_buffers_default.cpp
@@ -18,21 +18,17 @@ bool InitBuffersDefault::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co
 
     size_t idx = 0;
     size_t offset = 0;
-    for (auto expr_it = begin; expr_it != end; ++expr_it) {
-        const auto& expr = *expr_it;
-        const auto op = expr->get_node();
-        if (const auto buffer = ov::as_type_ptr<op::Buffer>(op)) {
-            buffer->set_reg_group(idx);
-            buffer->set_cluster_id(idx);
-
-            if (!buffer->is_defined()) {
-                buffer->set_offset(utils::get_dynamic_value<size_t>());
-            } else {
-                buffer->set_offset(offset);
-                offset += buffer->get_byte_size();
-            }
-            idx++;
+    for (const auto& buffer_expr : linear_ir.get_buffers()) {
+        buffer_expr->set_reg_group(idx);
+        buffer_expr->set_cluster_id(idx);
+
+        if (!buffer_expr->is_defined()) {
+            buffer_expr->set_offset(utils::get_dynamic_value<size_t>());
+        } else {
+            buffer_expr->set_offset(offset);
+            offset += buffer_expr->get_byte_size();
         }
+        idx++;
     }
 
     m_buffer_scratchpad_size = offset;
diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp
index 8e9b62d8fab825..9e8873ac6c7fe2 100644
--- a/src/common/snippets/src/lowered/pass/init_loops.cpp
+++ b/src/common/snippets/src/lowered/pass/init_loops.cpp
@@ -29,11 +29,11 @@ inline void init_is_incremented(LoopPort& port, size_t loop_id) {
         // Note: LoopPort connected to Buffer between two loops should not be incremented in the outermost loop
         // Consider the example below:
         //     Store; Loop ids [0,1,2,3]
-        //     IntermediateMemoryBuffer; Loop ids [0,1]
+        //     Buffer; Loop ids [0,1]
         //     Load; Loop ids [0,1,4,5]
         // Store is output port of Loop-1, but it should be incremented only in Loop-2 and Loop-3. Similar with Load.
         auto is_ignored = [=](const ExpressionPtr& target_expr) {
-            if (ov::is_type<op::IntermediateMemoryBuffer>(target_expr->get_node())) {
+            if (ov::is_type<BufferExpression>(target_expr)) {
                 const auto& target_loops = target_expr->get_loop_ids();
                 const auto i_max = std::min(expr_loops.size(), target_loops.size());
                 for (size_t i = 0; i < i_max && expr_loops[i] == target_loops[i]; i++) {
diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
index c6b5c3960e025b..fabb6573ab3b14 100644
--- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
@@ -115,7 +115,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir,
             //          Current expr Loop identifies:  3, 4, 6
             //          Need to insert between 2nd and 4th Loops - after 2nd Loop
             const auto pos = insertion_position(linear_ir, loop_manager, parent_expr, expr);
-            const auto buffer = std::make_shared<op::IntermediateMemoryBuffer>(parent->output(parent_port));
+            const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port));
             const auto buffer_consumer = has_shape_infer_parent ? top_shape_infer_expr->get_input_port(0)  : *entry_port;
             linear_ir.insert_node(buffer, std::vector<ExpressionPort>{ parent_expr_output }, buffer_loop_ids, false, pos, { buffer_consumer  });
         }
@@ -191,7 +191,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir,
             // Note: All potential consumers must have the same count of first equal Loop identifies and the same count of different last identifies
             const auto pos = insertion_position(linear_ir, loop_manager, expr, consumer_expr);
 
-            auto buffer = std::make_shared<op::IntermediateMemoryBuffer>(node->output(port_idx));
+            auto buffer = std::make_shared<op::Buffer>(node->output(port_idx));
             // We cannot insert Node output connector on Buffer output because not all consumers of Node needs Buffer
             //  Example:
             //       Add
diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp
index 231c783849908d..1885738eeb04b3 100644
--- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp
@@ -76,9 +76,9 @@ bool InsertLoadStore::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt be
             modified |= insert_load(linear_ir, expr_it);
         } else if (ov::is_type<ov::op::v0::Result>(node)) {
             modified |= insert_store(linear_ir, expr_it);
-        } else if (ov::is_type<op::Buffer>(node)) {
+        } else if (ov::is_type<BufferExpression>(expr)) {
             modified |= insert_load(linear_ir, expr_it);
-            if (ov::is_type<op::IntermediateMemoryBuffer>(node))
+            if (expr->get_input_count() > 0)
                 modified |= insert_store(linear_ir, expr_it);
         }
     }
diff --git a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp
index badf4b0477759c..1e99f8c845161f 100644
--- a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp
@@ -32,15 +32,19 @@ void connect_cloned_body_with_buffers_outside(LinearIR::constExprIt cur_begin, L
                 const auto& consumers = original_expr->get_output_port_connector(i)->get_consumers();
                 for (const auto& consumer : consumers) {
                     const auto consumer_expr = consumer.get_expr();
-                    const auto buffer = ov::as_type_ptr<op::IntermediateMemoryBuffer>(consumer_expr->get_node());
-                    if (buffer && std::find(cur_begin, cur_end, consumer.get_expr()) == cur_end) {
-                        OutputVector new_inputs = {result_expr->get_node()->output(i)};
-                        for (const auto& input : consumer_expr->get_input_port_connectors()) {
-                            const auto& source = input->get_source();
-                            new_inputs.push_back(source.get_expr()->get_node()->output(source.get_index()));
+                    const auto buffer_expr = ov::as_type_ptr<BufferExpression>(consumer_expr);
+                    if (buffer_expr && std::find(cur_begin, cur_end, consumer.get_expr()) == cur_end) {
+                        std::vector<PortDescriptorPtr> new_descs = {buffer_expr->get_input_port_descriptor(consumer.get_index())->clone()};
+                        std::vector<PortConnectorPtr> new_inputs = {result_expr->get_output_port_connector(i)};
+                        OutputVector new_op_inputs = {result_expr->get_node()->output(i)};
+                        for (size_t j = 0; j < buffer_expr->get_input_count(); ++j) {
+                            const auto& source = buffer_expr->get_input_port_connector(j)->get_source();
+                            new_op_inputs.push_back(source.get_expr()->get_node()->output(source.get_index()));
+                            new_descs.push_back(buffer_expr->get_input_port_descriptor(j)->clone());
+                            new_inputs.push_back(buffer_expr->get_input_port_connector(j));
                         }
-                        const auto new_buffer = buffer->clone_with_new_inputs(new_inputs);
-                        linear_ir.replace_with_node({consumer_expr}, new_buffer);
+                        const auto new_buffer_op = buffer_expr->get_node()->clone_with_new_inputs(new_op_inputs);
+                        linear_ir.replace_with_expr({consumer_expr}, buffer_expr->clone_with_new_inputs(new_buffer_op, new_inputs, new_descs));
                         break;
                     }
                 }
diff --git a/src/common/snippets/src/lowered/pass/normalize_buffer_reg_groups.cpp b/src/common/snippets/src/lowered/pass/normalize_buffer_reg_groups.cpp
index 3e235749ce7ca2..3431a198f90dc6 100644
--- a/src/common/snippets/src/lowered/pass/normalize_buffer_reg_groups.cpp
+++ b/src/common/snippets/src/lowered/pass/normalize_buffer_reg_groups.cpp
@@ -18,17 +18,13 @@ bool NormalizeBufferRegisterGroups::run(lowered::LinearIR& linear_ir, lowered::L
 
     // [ original Buffer reg group -> normalized ]
     std::map<size_t, size_t> buffer_reg_groups;
-    for (auto expr_it = begin; expr_it != end; ++expr_it) {
-        const auto& expr = *expr_it;
-        const auto op = expr->get_node();
-        if (const auto buffer = ov::as_type_ptr<op::Buffer>(op)) {
-            const auto group = buffer->get_reg_group();
-            if (buffer_reg_groups.count(group) == 0) {
-                const auto new_id = buffer_reg_groups.size();
-                buffer_reg_groups[group] = new_id;
-            }
-            buffer->set_reg_group(buffer_reg_groups[group]);
+    for (const auto& buffer_expr : linear_ir.get_buffers()) {
+        const auto group = buffer_expr->get_reg_group();
+        if (buffer_reg_groups.count(group) == 0) {
+            const auto new_id = buffer_reg_groups.size();
+            buffer_reg_groups[group] = new_id;
         }
+        buffer_expr->set_reg_group(buffer_reg_groups[group]);
     }
     return buffer_reg_groups.size();
 }
diff --git a/src/common/snippets/src/lowered/pass/propagate_buffer_offset.cpp b/src/common/snippets/src/lowered/pass/propagate_buffer_offset.cpp
index abab05700c2344..4e7d17cf284f89 100644
--- a/src/common/snippets/src/lowered/pass/propagate_buffer_offset.cpp
+++ b/src/common/snippets/src/lowered/pass/propagate_buffer_offset.cpp
@@ -17,28 +17,24 @@ namespace lowered {
 namespace pass {
 
 
-void PropagateBufferOffset::propagate(const ExpressionPtr& buffer_expr) {
+void PropagateBufferOffset::propagate(const BufferExpressionPtr& buffer_expr) {
     // If Buffer has offset We set this offset in the connected MemoryAccess ops
     // to correctly read and write data because all Buffers have the common data pointer on buffer scratchpad
 
-    const auto buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-    OPENVINO_ASSERT(buffer, "Failed to propagate Buffer offset: PropagateBufferOffset expects Buffer op");
-    const auto offset = buffer->get_offset();
+    const auto offset = buffer_expr->get_offset();
 
     // Propagate to up: in Store. Buffer can have only one Store
-    if (ov::is_type<op::IntermediateMemoryBuffer>(buffer)) {
-        for (const auto& input : buffer_expr->get_input_port_connectors()) {
-            const auto& parent_output = input->get_source();
-            const auto& parent_expr = parent_output.get_expr();
-            const auto port = parent_output.get_index();
-            const auto& parent_node = parent_expr->get_node();
-            auto memory_access = std::dynamic_pointer_cast<modifier::MemoryAccess>(parent_node);
-            if (memory_access && memory_access->is_memory_access_output_port(port)) {
-                memory_access->set_output_offset(offset, port);
-            } else {
-                OPENVINO_THROW(
-                        "PropagateBufferOffset didn't find the connected MemoryAccess op to Buffer for offset propagation");
-            }
+    for (const auto& input : buffer_expr->get_input_port_connectors()) {
+        const auto& parent_output = input->get_source();
+        const auto& parent_expr = parent_output.get_expr();
+        const auto port = parent_output.get_index();
+        const auto& parent_node = parent_expr->get_node();
+        auto memory_access = std::dynamic_pointer_cast<modifier::MemoryAccess>(parent_node);
+        if (memory_access && memory_access->is_memory_access_output_port(port)) {
+            memory_access->set_output_offset(offset, port);
+        } else {
+            OPENVINO_THROW(
+                    "PropagateBufferOffset didn't find the connected MemoryAccess op to Buffer for offset propagation");
         }
     }
     // Propagate to down: in Load. Buffer can have several Load
@@ -65,10 +61,8 @@ void PropagateBufferOffset::propagate(const ExpressionPtr& buffer_expr) {
 bool PropagateBufferOffset::run(lowered::LinearIR& linear_ir) {
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::PropagateBufferOffset");
 
-    const auto& buffer_expressions = linear_ir.get_buffers();
-    for (const auto& buffer_expr : buffer_expressions) {
+    for (const auto& buffer_expr : linear_ir.get_buffers())
         propagate(buffer_expr);
-    }
 
     return true;
 }
diff --git a/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp b/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp
index 59c9bf21a0894a..76ece34e844618 100644
--- a/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp
+++ b/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp
@@ -28,7 +28,7 @@ bool operator!=(const SetBufferRegGroup::ShiftPtrParams& lhs, const SetBufferReg
     return !(rhs == lhs);
 }
 
-size_t SetBufferRegGroup::get_buffer_idx(const ExpressionPtr& target, const BufferPool& pool) {
+size_t SetBufferRegGroup::get_buffer_idx(const BufferExpressionPtr& target, const BufferPool& pool) {
     const auto iter = std::find(pool.cbegin(), pool.cend(), target);
     OPENVINO_ASSERT(iter != pool.cend(), "Buffer wasn't find in Buffer system of Subgraph");
     return std::distance(pool.cbegin(), iter);
@@ -44,8 +44,8 @@ bool SetBufferRegGroup::can_be_in_one_group(const ShiftPtrParams& lhs, const Shi
     return are_static && equal_ptr_params_shifting && (equal_element_type_sizes || (lhs.ptr_increment == 0 && lhs.finalization_offset == 0));
 }
 
-bool SetBufferRegGroup::are_adjacent(const std::pair<ExpressionPtr, ShiftPtrParams>& lhs,
-                                   const std::pair<ExpressionPtr, ShiftPtrParams>& rhs) {
+bool SetBufferRegGroup::are_adjacent(const std::pair<BufferExpressionPtr, ShiftPtrParams>& lhs,
+                                     const std::pair<BufferExpressionPtr, ShiftPtrParams>& rhs) {
     const auto& lhs_ids = lhs.first->get_loop_ids();
     const auto& rhs_ids = rhs.first->get_loop_ids();
     const auto equal_loop_ids = lhs_ids == rhs_ids;
@@ -64,10 +64,10 @@ bool SetBufferRegGroup::are_adjacent(const std::pair<ExpressionPtr, ShiftPtrPara
     }
 }
 
-void SetBufferRegGroup::update_adj_matrix(const std::pair<ExpressionPtr, ShiftPtrParams>& lhs,
-                                        const std::pair<ExpressionPtr, ShiftPtrParams>& rhs,
-                                        const BufferPool& buffers,
-                                        std::vector<bool>& adj) {
+void SetBufferRegGroup::update_adj_matrix(const std::pair<BufferExpressionPtr, ShiftPtrParams>& lhs,
+                                          const std::pair<BufferExpressionPtr, ShiftPtrParams>& rhs,
+                                          const BufferPool& buffers,
+                                          std::vector<bool>& adj) {
     const auto size = buffers.size();
     const auto lhs_idx = get_buffer_idx(lhs.first, buffers);
     const auto rhs_idx = get_buffer_idx(rhs.first, buffers);
@@ -125,14 +125,14 @@ SetBufferRegGroup::BufferMap SetBufferRegGroup::get_buffer_loop_neighbours(const
     BufferMap buffer_neighbours;
     for (size_t i = 0; i < input_count; ++i) {
         const auto& parent_output = loop_end_expr->get_input_port_connector(i)->get_source().get_expr();
-        if (ov::is_type<op::Buffer>(parent_output->get_node())) {
-            if (buffer_neighbours.count(parent_output) > 0) {
-                OPENVINO_ASSERT(buffer_neighbours[parent_output].ptr_increment == ptr_increments[i] &&
-                                buffer_neighbours[parent_output].finalization_offset == finalization_offsets[i],
+        if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(parent_output)) {
+            if (buffer_neighbours.count(buffer_expr) > 0) {
+                OPENVINO_ASSERT(buffer_neighbours[buffer_expr].ptr_increment == ptr_increments[i] &&
+                                buffer_neighbours[buffer_expr].finalization_offset == finalization_offsets[i],
                                 "Invalid data pointer shifts: If Buffer has several consumers, this consumers must have the same shifts or zero");
                 continue;
             }
-            buffer_neighbours[parent_output] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] };
+            buffer_neighbours[buffer_expr] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] };
         }
     }
     for (size_t i = input_count; i < input_count + output_count; ++i) {
@@ -142,8 +142,8 @@ SetBufferRegGroup::BufferMap SetBufferRegGroup::get_buffer_loop_neighbours(const
         size_t loop_count = 0;
         for (const auto& consumer_input : consumer_inputs) {
             const auto& child_expr = consumer_input.get_expr();
-            if (ov::is_type<op::Buffer>(child_expr->get_node())) {
-                buffer_neighbours[child_expr] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] };
+            if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(child_expr)) {
+                buffer_neighbours[buffer_expr] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] };
                 buffer_count++;
             } else if (ov::is_type<op::LoopEnd>(child_expr->get_node())) {
                 loop_count++;
@@ -163,34 +163,41 @@ SetBufferRegGroup::BufferMap SetBufferRegGroup::get_buffer_loop_inside(const Lin
     BufferMap inner_buffers;
     for (auto it = std::reverse_iterator<LinearIR::constExprIt>(loop_end_it); (*it)->get_node() != loop_begin; ++it) {
         const auto& inner_expr = *it;
-        if (ov::is_type<op::Buffer>(inner_expr->get_node())) {
+        if (const auto buffer_expr = ov::as_type_ptr<BufferExpression>(inner_expr)) {
             // Set default zero values since it's not used for adjacency definition in case with Buffers in Loop
-            if (inner_buffers.count(inner_expr) == 0)
-                inner_buffers[inner_expr] = { 0, 0, 0 };
+            if (inner_buffers.count(buffer_expr) == 0)
+                inner_buffers[buffer_expr] = { 0, 0, 0 };
         }
     }
     return inner_buffers;
 }
 
 auto SetBufferRegGroup::coloring(BufferPool& buffers, std::vector<bool>& adj) -> std::map<size_t, BufferPool> {
+     auto get_buffer_it = [&](size_t index) {
+        OPENVINO_ASSERT(index < buffers.size(), "Incorrect index");
+        BufferPool::iterator it = buffers.begin();
+        std::advance(it, index);
+        return it;
+    };
     size_t color = 0;
     std::map<size_t, BufferPool> color_groups;
     const auto size = buffers.size();
     for (size_t i = 0; i < size; i++) {
+        auto& buffer_i = *get_buffer_it(i);
         // The Buffer is already colored (visited) - skip
-        if (!buffers[i])
+        if (!buffer_i)
             continue;
 
-        const auto& buffer = buffers[i];
-        color_groups[color].push_back(buffer); // Add to Color Group
-        buffers[i] = nullptr;  // Remove from graph vertices
+        color_groups[color].push_back(buffer_i); // Add to Color Group
+        buffer_i = nullptr;  // Remove from graph vertices
 
         // While Buffer `i` has non-coloured non-neighbours (while row `i` contains 0)
-        while (!std::accumulate(adj.begin() + i * size, adj.begin() + (i + 1) * size, true, std::logical_and<bool>())) {
+        while ((i + 1 < size) && !std::accumulate(adj.begin() + i * size, adj.begin() + (i + 1) * size, true, std::logical_and<bool>())) {
             size_t j = i + 1;
+            auto buffer_j_it = get_buffer_it(j);
             // Find first non-adjacent and non-visited (non-colored) Buffer to color him to the same color
-            for (; j < size; ++j) {
-                if (!adj[index(size, i, j)] && buffers[j])
+            for (; j < size; ++j, ++buffer_j_it) {
+                if (!adj[index(size, i, j)] && *buffer_j_it)
                     break;
             }
 
@@ -199,9 +206,10 @@ auto SetBufferRegGroup::coloring(BufferPool& buffers, std::vector<bool>& adj) ->
             if (j == size)
                 break;
 
-            const auto& neighbour_buffer = buffers[j];
+            auto& buffer_j = *buffer_j_it;
+            const auto& neighbour_buffer = buffer_j;
             color_groups[color].push_back(neighbour_buffer); // Add to Color Group
-            buffers[j] = nullptr;  // Remove from graph vertices
+            buffer_j = nullptr;  // Remove from graph vertices
             // Unite adjacency links:
             //    All the neighbors of Buffer `j` are added to the neighbors of Buffer `i` (the `vertices` are pulled together).
             //    The result is an updated i-th row of the adjacency matrix,
@@ -220,14 +228,7 @@ auto SetBufferRegGroup::coloring(BufferPool& buffers, std::vector<bool>& adj) ->
 bool SetBufferRegGroup::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) {
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SetBufferRegGroup")
     // Identify Buffers using Graph coloring algorithm.
-    BufferPool buffer_pool;
-
-    for (auto expr_it = begin; expr_it != end; ++expr_it) {
-        const auto& expr = *expr_it;
-        if (ov::is_type<op::Buffer>(expr->get_node())) {
-            buffer_pool.push_back(expr);
-        }
-    }
+    BufferPool buffer_pool = linear_ir.get_buffers();
 
     // Creation of Adj matrix
     auto adj = create_adjacency_matrix(begin, end, buffer_pool);
@@ -238,9 +239,8 @@ bool SetBufferRegGroup::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt
     for (const auto& pair : color_groups) {
         const auto color = pair.first;
         const auto& united_buffers = pair.second;
-        for (const auto& buffer_expr : united_buffers) {
-            ov::as_type_ptr<op::Buffer>(buffer_expr->get_node())->set_reg_group(color);
-        }
+        for (const auto& buffer_expr : united_buffers)
+            buffer_expr->set_reg_group(color);
     }
 
     return true;
diff --git a/src/common/snippets/src/lowered/pass/solve_buffer_memory.cpp b/src/common/snippets/src/lowered/pass/solve_buffer_memory.cpp
index 2a6b68738f7a68..ca85cefd369099 100644
--- a/src/common/snippets/src/lowered/pass/solve_buffer_memory.cpp
+++ b/src/common/snippets/src/lowered/pass/solve_buffer_memory.cpp
@@ -28,22 +28,17 @@ std::map<double, int> create_execution_number_mapping(const LinearIR& linear_ir)
 }
 }  // namespace
 
-std::pair<LinearIR::container, LinearIR::container> SolveBufferMemory::extract_static_and_dynamic_buffers(const LinearIR::container& buffer_expressions) {
-    LinearIR::container static_buffer_exprs, dynamic_buffer_exprs;
+std::pair<SolveBufferMemory::Buffers, SolveBufferMemory::Buffers> SolveBufferMemory::extract_static_and_dynamic_buffers(const Buffers& buffer_expressions) {
+    Buffers static_buffer_exprs, dynamic_buffer_exprs;
     for (const auto& buffer_expr : buffer_expressions) {
-        const auto& buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-        OPENVINO_ASSERT(buffer, "Buffer clusters expects Buffer nodes");
-
-        auto& clusters = buffer->is_defined() ? static_buffer_exprs : dynamic_buffer_exprs;
+        auto& clusters = buffer_expr->is_defined() ? static_buffer_exprs : dynamic_buffer_exprs;
         clusters.push_back(buffer_expr);
     }
 
     // Validation check that buffer cluster has only static or dynamic buffers.
     for (const auto& static_buffer : static_buffer_exprs) {
-        const auto static_cluster_id = ov::as_type_ptr<op::Buffer>(static_buffer->get_node())->get_cluster_id();
-        auto is_cluster_ids_the_same = [&static_cluster_id](const ExpressionPtr& expr) {
-            return static_cluster_id == ov::as_type_ptr<op::Buffer>(expr->get_node())->get_cluster_id();
-        };
+        const auto static_cluster_id = static_buffer->get_cluster_id();
+        auto is_cluster_ids_the_same = [&static_cluster_id](const BufferExpressionPtr& expr) { return static_cluster_id == expr->get_cluster_id(); };
         OPENVINO_ASSERT(std::none_of(dynamic_buffer_exprs.cbegin(), dynamic_buffer_exprs.cend(), is_cluster_ids_the_same),
                         "There is Buffer cluster with buffers which has defined and undefined allocation sizes");
     }
@@ -51,7 +46,7 @@ std::pair<LinearIR::container, LinearIR::container> SolveBufferMemory::extract_s
     return { static_buffer_exprs, dynamic_buffer_exprs };
 }
 
-std::vector<ov::MemorySolver::Box> SolveBufferMemory::init_boxes(const LinearIR::container& buffer_expressions, const LinearIR& linear_ir) {
+std::vector<ov::MemorySolver::Box> SolveBufferMemory::init_boxes(const Buffers& buffer_expressions, const LinearIR& linear_ir) {
     // ov::MemorySolver interface requires integer execution numbers (lifetime must be integer).
     // To align with ov::MemorySolver interface, we create the map [double -> integer]
     const auto int_execution_numbers = create_execution_number_mapping(linear_ir);
@@ -63,9 +58,7 @@ std::vector<ov::MemorySolver::Box> SolveBufferMemory::init_boxes(const LinearIR:
 
     std::map<int, ov::MemorySolver::Box> map_boxes;
     for (const auto& buffer_expr : buffer_expressions) {
-        const auto& buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-        OPENVINO_ASSERT(buffer, "Buffer clusters expects Buffer nodes");
-        auto cluster_id = static_cast<int>(buffer->get_cluster_id());
+        auto cluster_id = static_cast<int>(buffer_expr->get_cluster_id());
 
         if (map_boxes.count(cluster_id) == 0) {
             map_boxes[cluster_id] = { std::numeric_limits<int>::max(), 0, 0, cluster_id };
@@ -98,7 +91,7 @@ std::vector<ov::MemorySolver::Box> SolveBufferMemory::init_boxes(const LinearIR:
         }
         OPENVINO_ASSERT(e_start <= e_finish, "Incorrect life time of buffer!");
 
-        auto buffer_size = static_cast<int64_t>(buffer->get_byte_size());
+        auto buffer_size = static_cast<int64_t>(buffer_expr->get_byte_size());
         box.size = std::max(buffer_size, box.size);
 
         box.start = std::min(e_start, box.start);
@@ -119,7 +112,7 @@ std::vector<ov::MemorySolver::Box> SolveBufferMemory::init_boxes(const LinearIR:
     return boxes;
 }
 
-void SolveBufferMemory::solve_static_buffer_memory(const LinearIR::container& static_buffer_expressions, const LinearIR& linear_ir) {
+void SolveBufferMemory::solve_static_buffer_memory(const Buffers& static_buffer_expressions, const LinearIR& linear_ir) {
     const auto boxes = init_boxes(static_buffer_expressions, linear_ir);
 
     ov::MemorySolver memSolver(boxes);
@@ -127,37 +120,28 @@ void SolveBufferMemory::solve_static_buffer_memory(const LinearIR::container& st
 
     // Set offsets for Buffers
     for (const auto& buffer_expr : static_buffer_expressions) {
-        const auto& buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-        OPENVINO_ASSERT(buffer, "Buffer clusters expects Buffer nodes");
-
-        const auto offset = static_cast<size_t>(memSolver.get_offset(static_cast<int>(buffer->get_cluster_id())));
-        buffer->set_offset(offset * m_alignment);  // alignment in byte
+        const auto offset = static_cast<size_t>(memSolver.get_offset(static_cast<int>(buffer_expr->get_cluster_id())));
+        buffer_expr->set_offset(offset * m_alignment);  // alignment in byte
     }
 }
 
-void SolveBufferMemory::set_dynamic_buffer_offset(const LinearIR::container& dynamic_buffer_expressions) {
+void SolveBufferMemory::set_dynamic_buffer_offset(const Buffers& dynamic_buffer_expressions) {
     size_t offset = utils::get_dynamic_value<size_t>();
 
     // If there are not allocated memory for static buffers in LinearIR and there is only one cluster of dynamic buffer exprs,
     // we can force offset = 0
     if (m_static_buffer_scratchpad_size == 0) {
         std::set<size_t> dynamic_clusters;
-        for (const auto& dynamic_buffer_expr : dynamic_buffer_expressions) {
-            const auto& buffer = ov::as_type_ptr<op::Buffer>(dynamic_buffer_expr->get_node());
-            OPENVINO_ASSERT(buffer, "Buffer clusters expects Buffer nodes");
-            dynamic_clusters.insert(buffer->get_cluster_id());
-        }
+        for (const auto& dynamic_buffer_expr : dynamic_buffer_expressions)
+            dynamic_clusters.insert(dynamic_buffer_expr->get_cluster_id());
+
         if (dynamic_clusters.size() == 1)
             offset = 0;
     }
 
     // Set offsets for Buffers
-    for (const auto& buffer_expr : dynamic_buffer_expressions) {
-        const auto& buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-        OPENVINO_ASSERT(buffer, "Buffer clusters expects Buffer nodes");
-
-        buffer->set_offset(offset);
-    }
+    for (const auto& buffer_expr : dynamic_buffer_expressions)
+        buffer_expr->set_offset(offset);
 }
 
 bool SolveBufferMemory::run(LinearIR& linear_ir) {
@@ -165,7 +149,7 @@ bool SolveBufferMemory::run(LinearIR& linear_ir) {
 
     // TODO [143395] : MemoryManager will be able to return two containers with dynamic and static buffers
     //                 without additional `extract` functions in all passes
-    LinearIR::container static_buffer_exprs, dynamic_buffer_exprs;
+    Buffers static_buffer_exprs, dynamic_buffer_exprs;
     std::tie(static_buffer_exprs, dynamic_buffer_exprs) = extract_static_and_dynamic_buffers(linear_ir.get_buffers());
 
     if (!static_buffer_exprs.empty())
diff --git a/src/common/snippets/src/lowered/pass/validate.cpp b/src/common/snippets/src/lowered/pass/validate.cpp
index 24fff8ab0fc00b..2e9e5813c03264 100644
--- a/src/common/snippets/src/lowered/pass/validate.cpp
+++ b/src/common/snippets/src/lowered/pass/validate.cpp
@@ -64,10 +64,12 @@ void validate_result(const ExpressionPtr& expr, const LinearIR& linear_ir) {
 void validate_buffer(const ExpressionPtr& expr, const LinearIR& linear_ir) {
     OPENVINO_ASSERT(ov::is_type<op::Buffer>(expr->get_node()),
                     "Buffer validation expects Buffer op");
+    OPENVINO_ASSERT(ov::is_type<BufferExpression>(expr),
+                    "Buffer validation expects Buffer expression");
     for (const auto& input : expr->get_input_port_connectors()) {
         const auto& source = input->get_source();
         const auto ma = std::dynamic_pointer_cast<snippets::modifier::MemoryAccess>(source.get_expr()->get_node());
-        OPENVINO_ASSERT(ma && ma->is_memory_access_input_port(source.get_index()),
+        OPENVINO_ASSERT(ma && ma->is_memory_access_output_port(source.get_index()),
                     "Buffer expects MemoryAccess parent");
         const auto buffer_siblings = input->get_consumers();
         for (const auto& buffer_sibling : buffer_siblings) {
@@ -124,39 +126,6 @@ void validate_loop_end(const ExpressionPtr& expr, const LinearIR& linear_ir) {
     validate_loop_ports(input_port_infos);
     validate_loop_ports(output_port_infos, loop_end->get_input_num());
 }
-
-// TODO [143395] : Extract this validation checks to the separate `ValidateBuffers` pass
-void validate_buffer_expressions(const LinearIR::container& buffer_expressions) {
-    std::set<size_t> cluster_ids;
-    std::map<size_t, std::set<lowered::ExpressionPtr>> dynamic_buffer_clusters, static_buffer_clusters;
-
-    for (const auto& buffer_expr : buffer_expressions) {
-        const auto buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-        OPENVINO_ASSERT(buffer, "Expected Buffer ops in Buffer expressions of LinearIR");
-
-        // TODO [143395] : MemoryManager should provide exact containers with needed buffers (static or dynamic) without any `is_defined()`
-        auto& clusters = buffer->is_defined() ? static_buffer_clusters : dynamic_buffer_clusters;
-        clusters[buffer->get_cluster_id()].insert(buffer_expr);
-        cluster_ids.insert(buffer->get_cluster_id());
-    }
-
-    OPENVINO_ASSERT(cluster_ids.size() == dynamic_buffer_clusters.size() + static_buffer_clusters.size(), "Incorrect count of Buffer clusters");
-    OPENVINO_ASSERT(cluster_ids.empty() || (*cluster_ids.cbegin() == 0 && *cluster_ids.crbegin() == (cluster_ids.size() - 1)),
-                    "Incorrect indetifiers of Buffer clusters");
-
-    for (const auto& p : static_buffer_clusters) {
-        const auto& cluster_id = p.first;
-        const auto& cluster = p.second;
-        OPENVINO_ASSERT(dynamic_buffer_clusters.count(cluster_id) == 0, "Buffers from the same cluster must be only static or dynamic");
-
-        OPENVINO_ASSERT(cluster.size() > 0, "Incorrect size of buffer cluster");
-        size_t cluster_offset = ov::as_type_ptr<op::Buffer>((*cluster.cbegin())->get_node())->get_offset();
-        for (const auto& buffer_expr : cluster) {
-            OPENVINO_ASSERT(cluster_offset == ov::as_type_ptr<op::Buffer>(buffer_expr->get_node())->get_offset(),
-                            "Static Buffers from the same cluster must have the same offset!");
-        }
-    }
-}
 } // namespace
 
 Validate::Validate() {
@@ -188,8 +157,6 @@ bool Validate::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lo
         prev_exec_order = expr->get_exec_num();
     }
 
-    validate_buffer_expressions(linear_ir.get_buffers());
-
     return false;
 }
 
diff --git a/src/common/snippets/src/lowered/pass/validate_buffers.cpp b/src/common/snippets/src/lowered/pass/validate_buffers.cpp
new file mode 100644
index 00000000000000..c5100f42333ede
--- /dev/null
+++ b/src/common/snippets/src/lowered/pass/validate_buffers.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/lowered/pass/validate_buffers.hpp"
+
+#include "snippets/utils/utils.hpp"
+#include "snippets/itt.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+bool ValidateBuffers::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) {
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ValidateBuffers")
+
+    const auto& lir_buffers = linear_ir.get_buffers();
+
+    // Firstly we check that all BufferExpression are in "get_buffers()"
+    for (const auto& expr : linear_ir) {
+        if (const auto& buffer_expr = ov::as_type_ptr<BufferExpression>(expr))
+            OPENVINO_ASSERT(std::find(lir_buffers.cbegin(), lir_buffers.cend(), buffer_expr) != lir_buffers.cend(),
+                            "All BufferExpressions must be in LinearIR.get_buffers()");
+    }
+
+    // Secondly we should validate `lir_buffers`:
+    // - execution order
+    // - clusters
+
+    std::set<size_t> cluster_ids;
+    std::map<size_t, std::set<lowered::BufferExpressionPtr>> dynamic_buffer_clusters, static_buffer_clusters;
+
+    double prev_exec_order =  -1 * std::numeric_limits<double>::max();
+    for (const auto& buffer_expr : lir_buffers) {
+        // TODO [143395] : MemoryManager should provide exact containers with needed buffers (static or dynamic) without any `is_defined()`
+        auto& clusters = buffer_expr->is_defined() ? static_buffer_clusters : dynamic_buffer_clusters;
+        clusters[buffer_expr->get_cluster_id()].insert(buffer_expr);
+        cluster_ids.insert(buffer_expr->get_cluster_id());
+
+        OPENVINO_ASSERT(buffer_expr->get_exec_num() > prev_exec_order, "Invalid execution order of buffer expressions");
+        prev_exec_order = buffer_expr->get_exec_num();
+        buffer_expr->validate();
+    }
+
+    OPENVINO_ASSERT(cluster_ids.size() == dynamic_buffer_clusters.size() + static_buffer_clusters.size(), "Incorrect count of Buffer clusters");
+    OPENVINO_ASSERT(cluster_ids.empty() || (*cluster_ids.cbegin() == 0 && *cluster_ids.crbegin() == (cluster_ids.size() - 1)),
+                    "Incorrect indetifiers of Buffer clusters");
+
+    for (const auto& p : static_buffer_clusters) {
+        const auto& cluster_id = p.first;
+        const auto& cluster = p.second;
+        OPENVINO_ASSERT(dynamic_buffer_clusters.count(cluster_id) == 0, "Buffers from the same cluster must be only static or dynamic");
+
+        OPENVINO_ASSERT(cluster.size() > 0, "Incorrect size of buffer cluster");
+        size_t cluster_offset = (*cluster.cbegin())->get_offset();
+        for (const auto& buffer_expr : cluster) {
+            OPENVINO_ASSERT(cluster_offset == buffer_expr->get_offset(), "Static Buffers from the same cluster must have the same offset!");
+        }
+    }
+
+    return !lir_buffers.empty();
+}
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/src/op/buffer.cpp b/src/common/snippets/src/op/buffer.cpp
index 0c7403cd56f6f5..0c13c12ee8c32d 100644
--- a/src/common/snippets/src/op/buffer.cpp
+++ b/src/common/snippets/src/op/buffer.cpp
@@ -13,89 +13,83 @@ namespace ov {
 namespace snippets {
 namespace op {
 
-Buffer::Buffer(const OutputVector& arguments, size_t allocation_size, size_t reg_group, size_t cluster_id)
-    : Op(arguments), m_allocation_size(allocation_size), m_reg_group(reg_group), m_cluster_id(cluster_id), m_offset(0) {
-    constructor_validate_and_infer_types();
-}
-
-bool Buffer::visit_attributes(AttributeVisitor& visitor) {
-    INTERNAL_OP_SCOPE(Buffer_visit_attributes);
-    auto element_type = get_element_type();
-    auto allocation_size = utils::value2str(m_allocation_size);
-    auto offset = utils::value2str(m_offset);
-    visitor.on_attribute("allocation_size", allocation_size);
-    visitor.on_attribute("offset", offset);
-    visitor.on_attribute("reg_group", m_reg_group);
-    visitor.on_attribute("cluster_id", m_cluster_id);
-    visitor.on_attribute("element_type", element_type);
-    return true;
-}
-
-bool Buffer::is_defined() const {
-    return !utils::is_dynamic_value(m_allocation_size);
-}
-
-size_t Buffer::get_byte_size() const {
-    if (is_defined())
-        return m_allocation_size * get_element_type().size();
-    return utils::get_dynamic_value<size_t>();
-}
+Buffer::Buffer(const ov::Output<ov::Node>& arg) : Buffer(ov::OutputVector{arg}) {}
 
-IntermediateMemoryBuffer::IntermediateMemoryBuffer(const OutputVector& arguments, size_t allocation_size, size_t reg_group, size_t cluster_id)
-    : Buffer(arguments, allocation_size, reg_group, cluster_id) {
+Buffer::Buffer(const OutputVector& arguments) : Op(arguments), m_type(Type::IntermediateMemory) {
     constructor_validate_and_infer_types();
 }
 
-IntermediateMemoryBuffer::IntermediateMemoryBuffer(const ov::Output<ov::Node>& arg, size_t allocation_size, size_t reg_group, size_t cluster_id)
-    : IntermediateMemoryBuffer(OutputVector{arg}, allocation_size, reg_group, cluster_id) {}
-
-void IntermediateMemoryBuffer::validate_and_infer_types() {
-    INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types);
-    ov::PartialShape output_shape;
-    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
-}
-
-std::shared_ptr<Node> IntermediateMemoryBuffer::clone_with_new_inputs(const OutputVector& new_args) const {
-    INTERNAL_OP_SCOPE(Buffer_clone_with_new_inputs);
-    auto new_buffer = std::make_shared<IntermediateMemoryBuffer>(new_args, m_allocation_size, m_reg_group, m_cluster_id);
-    new_buffer->set_offset(m_offset);
-    return new_buffer;
+Buffer::Buffer(const ov::Shape& shape, ov::element::Type element_type) : Op(), m_type(Type::NewMemory), m_output_shape(shape), m_element_type(element_type) {
+    constructor_validate_and_infer_types();
 }
 
-NewMemoryBuffer::NewMemoryBuffer(const ov::Shape& shape, size_t reg_group, size_t cluster_id, ov::element::Type element_type)
-    : Buffer({}, ov::shape_size(shape), reg_group, cluster_id), m_output_shape(shape), m_element_type(element_type) {
-    constructor_validate_and_infer_types();
+bool Buffer::visit_attributes(AttributeVisitor& visitor) {
+    INTERNAL_OP_SCOPE(Buffer_visit_attributes);
+    auto shape = utils::pshape_to_vdims(get_output_partial_shape(0));
+    auto etype = get_output_element_type(0);
+    visitor.on_attribute("shape", shape);
+    visitor.on_attribute("element_type", etype);
+    return true;
 }
 
-void NewMemoryBuffer::validate_and_infer_types() {
+void Buffer::validate_and_infer_types() {
     INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types);
-    OPENVINO_ASSERT(get_input_size() == 0, "Buffer with new allocated memory mustn't have arguments!");
-    set_output_type(0, m_element_type, m_output_shape);
+    if (m_type == Type::NewMemory) {
+        OPENVINO_ASSERT(get_input_size() == 0, "NewMemory Buffer mustn't have inputs");
+        set_output_type(0, m_element_type, m_output_shape);
+    } else if (m_type == Type::IntermediateMemory) {
+        OPENVINO_ASSERT(get_input_size() != 0, "IntermediateMemory Buffer must have inputs");
+        const auto inputs = input_values();
+        const auto inshape = get_input_partial_shape(0);
+        const auto intype = get_input_element_type(0);
+        OPENVINO_ASSERT(std::all_of(inputs.cbegin() + 1, inputs.cend(),
+                                    [&](const ov::Output<ov::Node>& in) { return in.get_partial_shape() == inshape && in.get_element_type() == intype; }),
+                        "All inputs of Buffers must have the same shape and element type");
+        set_output_type(0, intype, inshape);
+    } else {
+        OPENVINO_THROW("Unknown Buffer type");
+    }
 }
 
-std::shared_ptr<Node> NewMemoryBuffer::clone_with_new_inputs(const OutputVector& new_args) const {
+std::shared_ptr<Node> Buffer::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(Buffer_clone_with_new_inputs);
-    check_new_args_count(this, new_args);
-    auto new_buffer = std::make_shared<NewMemoryBuffer>(m_output_shape, m_reg_group, m_cluster_id, m_element_type);
-    new_buffer->set_offset(m_offset);
-    return new_buffer;
+    if (m_type == Type::NewMemory) {
+        OPENVINO_ASSERT(new_args.empty(), "NewMemory Buffer mustn't have inputs");
+        return std::make_shared<Buffer>(m_output_shape, m_element_type);
+    } else if (m_type == Type::IntermediateMemory) {
+        return std::make_shared<Buffer>(new_args);
+    } else {
+        OPENVINO_THROW("Unknown Buffer type");
+    }
 }
 
-void NewMemoryBuffer::set_element_type(ov::element::Type element_type) {
-    m_element_type = std::move(element_type);
-    // Apply the change
-    validate_and_infer_types();
+size_t Buffer::get_allocation_size() const {
+    if (m_type == Type::NewMemory) {
+        const auto pshape = get_output_partial_shape(0);
+        OPENVINO_ASSERT(pshape.is_static(), "If Buffer doesn't have source - output shape must be static");
+        return ov::shape_size(pshape.get_shape());
+    }
+    return utils::get_dynamic_value<size_t>();
 }
 
-NewMemoryBuffer::ShapeInfer::ShapeInfer(const std::shared_ptr<ov::Node>& n) {
-    const auto& buffer = ov::as_type_ptr<NewMemoryBuffer>(n);
-    OPENVINO_ASSERT(buffer, "Got invalid node in NewMemoryBuffer::ShapeInfer");
-    m_shape = buffer->get_shape();
+Buffer::ShapeInfer::ShapeInfer(const std::shared_ptr<ov::Node>& n) {
+    const auto& buffer = ov::as_type_ptr<Buffer>(n);
+    OPENVINO_ASSERT(buffer, "Got invalid node in Buffer::ShapeInfer");
+    m_type = buffer->m_type;
+    OPENVINO_ASSERT(utils::one_of(m_type, Type::IntermediateMemory, Type::NewMemory), "Got invalid Buffer type");
+    if (m_type == Type::NewMemory)
+        m_shape = buffer->m_output_shape;
 }
 
-IShapeInferSnippets::Result NewMemoryBuffer::ShapeInfer::infer(const std::vector<VectorDimsRef>& input_shapes) {
-    OPENVINO_ASSERT(input_shapes.empty(), "NewMemoryBuffer shape inference mustn't have input shapes");
-    return {{m_shape}, ShapeInferStatus::success};
+IShapeInferSnippets::Result Buffer::ShapeInfer::infer(const std::vector<VectorDimsRef>& input_shapes) {
+    if (m_type == Type::NewMemory) {
+        OPENVINO_ASSERT(input_shapes.empty(), "NewMemoryBuffer shape inference mustn't have input shapes");
+        return {{m_shape}, ShapeInferStatus::success};
+    } else if (m_type == Type::IntermediateMemory) {
+        OPENVINO_ASSERT(!input_shapes.empty(), "IntermediateMemoryBuffer shape inference must have input shapes");
+        return {{input_shapes[0].get()}, ShapeInferStatus::success};
+    }
+    OPENVINO_THROW("Uknown Buffer type!");
 }
 
 } // namespace op
diff --git a/src/common/snippets/src/op/serialization_node.cpp b/src/common/snippets/src/op/serialization_node.cpp
index c136acea975a42..1718f770ad62d6 100644
--- a/src/common/snippets/src/op/serialization_node.cpp
+++ b/src/common/snippets/src/op/serialization_node.cpp
@@ -40,83 +40,7 @@ std::shared_ptr<Node> SerializationNode::clone_with_new_inputs(const OutputVecto
 }
 
 bool SerializationNode::visit_attributes(AttributeVisitor &visitor) {
-    auto is_planar_layout = [](const std::vector<size_t>& layout) {
-        for (size_t i = 0; i < layout.size(); ++i)
-            if (layout[i] != i) return false;
-        return true;
-    };
-    auto subtensor2str = [](const VectorDims& subtensor) {
-        std::stringstream ss;
-        for (size_t i = 0; i < subtensor.size(); ++i) {
-            const auto& v = subtensor[i];
-            const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" :
-                               utils::is_dynamic_value(v)  ? "?" : std::to_string(v);
-            const auto del = i < subtensor.size() - 1 ? ", " : "";
-            ss << v_str << del;
-        }
-        return ss.str();
-    };
-
-    std::vector<size_t> in_regs, out_regs;
-    std::vector<std::string> in_reg_types, out_reg_types;
-    std::vector<std::pair<std::string, ov::PartialShape>> shapes;
-    std::vector<std::pair<std::string, std::string>> subtensors;
-    std::vector<std::pair<std::string, std::vector<size_t>>> layouts;
-    for (size_t i = 0; i < m_expr->get_input_count(); i++) {
-        const auto& desc = m_expr->get_input_port_descriptor(i);
-        const auto& shape = desc->get_shape();
-        if (!shape.empty())
-            shapes.emplace_back("in_shape_" + std::to_string(i), ov::PartialShape(shape));
-
-        const auto& subtensor = desc->get_subtensor();
-        if (!subtensor.empty())
-            subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor));
-
-        const auto& layout = desc->get_layout();
-        if (!layout.empty() && !is_planar_layout(layout))
-            layouts.emplace_back("in_layout_" + std::to_string(i), layout);
-
-        in_reg_types.emplace_back(regTypeToStr(desc->get_reg().type));
-        in_regs.emplace_back(desc->get_reg().idx);
-    }
-    for (size_t i = 0; i < m_expr->get_output_count(); i++) {
-        const auto& desc = m_expr->get_output_port_descriptor(i);
-        const auto& shape = desc->get_shape();
-        if (!shape.empty())
-            shapes.emplace_back("out_shape_" + std::to_string(i), ov::PartialShape(shape));
-
-        const auto& subtensor = desc->get_subtensor();
-        if (!subtensor.empty())
-            subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor));
-
-        const auto& layout = desc->get_layout();
-        if (!layout.empty() && !is_planar_layout(layout))
-            layouts.emplace_back("out_layout_" + std::to_string(i), layout);
-
-        out_reg_types.emplace_back(regTypeToStr(desc->get_reg().type));
-        out_regs.emplace_back(desc->get_reg().idx);
-    }
-
-    if (!in_regs.empty()) {
-        visitor.on_attribute("in_regs", in_regs);
-        visitor.on_attribute("in_reg_types", in_reg_types);
-    }
-    if (!out_regs.empty()) {
-        visitor.on_attribute("out_regs", out_regs);
-        visitor.on_attribute("out_reg_types", out_reg_types);
-    }
-    for (auto& s : shapes)
-        visitor.on_attribute(s.first, s.second);
-    for (auto& s : subtensors)
-        visitor.on_attribute(s.first, s.second);
-    for (auto& s : layouts)
-        visitor.on_attribute(s.first, s.second);
-    auto loop_ids = m_expr->get_loop_ids();
-    visitor.on_attribute("loop_ids", loop_ids);
-    auto exec_num = m_expr->get_exec_num();
-    visitor.on_attribute("execution_number", exec_num);
-    m_expr->get_node()->visit_attributes(visitor);
-    return true;
+    return m_expr->visit_attributes(visitor);
 }
 
 } // namespace op
diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp
index cf9f6b3121782e..0690494220171a 100644
--- a/src/common/snippets/src/op/subgraph.cpp
+++ b/src/common/snippets/src/op/subgraph.cpp
@@ -44,6 +44,7 @@
 #include "snippets/lowered/pass/optimize_domain.hpp"
 #include "snippets/lowered/pass/insert_perf_count.hpp"
 #include "snippets/lowered/pass/validate_shapes.hpp"
+#include "snippets/lowered/pass/validate_buffers.hpp"
 #include "snippets/lowered/pass/validate.hpp"
 #include "snippets/lowered/pass/pass_config.hpp"
 #include "snippets/lowered/pass/reduce_decomposition.hpp"
@@ -472,6 +473,7 @@ void Subgraph::control_flow_transformations(size_t min_parallel_work_amount, siz
     pipeline.register_pass<lowered::pass::AllocateBuffers>(m_linear_ir->get_config().m_are_buffers_optimized);
     pipeline.register_pass<lowered::pass::CleanRepeatedDataPointerShifts>();
     pipeline.register_positioned_passes(lowered_backend_passes);
+    pipeline.register_pass<lowered::pass::ValidateBuffers>(); // must be last
     pipeline.register_pass<lowered::pass::Validate>(); // must be last
     pipeline.run(*m_linear_ir);
 
diff --git a/src/common/snippets/src/runtime_configurator.cpp b/src/common/snippets/src/runtime_configurator.cpp
index 552455b89f5529..a2b288eabde14e 100644
--- a/src/common/snippets/src/runtime_configurator.cpp
+++ b/src/common/snippets/src/runtime_configurator.cpp
@@ -154,17 +154,14 @@ void RuntimeConfigurator::init_data_info(const lowered::LinearIRCPtr& linear_ir)
 }
 
 void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRCPtr& linear_ir) {
-    std::map<size_t, std::set<lowered::ExpressionPtr>> dynamic_buffer_clusters, static_buffer_clusters;
+    std::map<size_t, std::set<lowered::BufferExpressionPtr>> dynamic_buffer_clusters, static_buffer_clusters;
 
     // All needed checks are in Validate pass
     const auto& buffer_expressions = linear_ir->get_buffers();
     for (const auto& buffer_expr : buffer_expressions) {
-        const auto buffer = ov::as_type_ptr<op::Buffer>(buffer_expr->get_node());
-        OPENVINO_ASSERT(buffer, "Expected Buffer ops in Buffer expressions of LinearIR");
-
         // TODO [143395] : MemoryManager should provide exact containers with needed buffers (static or dynamic) without any `is_defined()`
-        auto& clusters = buffer->is_defined() ? static_buffer_clusters : dynamic_buffer_clusters;
-        clusters[buffer->get_cluster_id()].insert(buffer_expr);
+        auto& clusters = buffer_expr->is_defined() ? static_buffer_clusters : dynamic_buffer_clusters;
+        clusters[buffer_expr->get_cluster_id()].insert(buffer_expr);
     }
 
     const auto cluster_count = dynamic_buffer_clusters.size() + static_buffer_clusters.size();
@@ -176,7 +173,7 @@ void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRCPtr& linear_i
         const auto& cluster = p.second;
 
         OPENVINO_ASSERT(cluster.size() > 0, "Incorrect size of buffer cluster");
-        size_t cluster_offset = ov::as_type_ptr<op::Buffer>((*cluster.cbegin())->get_node())->get_offset();
+        size_t cluster_offset = (*cluster.cbegin())->get_offset();
         m_config->buffer_cluster_offsets[cluster_id] = cluster_offset;
     }
 
@@ -269,7 +266,8 @@ void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRC
             // No need to calculate allocation size of Buffers which are in Loops with `work_amount = 0` - they won't be executed
             if (is_not_executed(buffer_expr))
                 continue;
-            const auto& allocation_size = lowered::pass::ComputeBufferAllocationSize::get_allocation_size(loop_manager, buffer_expr, m_config->tile_rank);
+            buffer_expr->init_allocation_size(loop_manager, m_config->tile_rank);
+            const auto& allocation_size = buffer_expr->get_allocation_size();
             OPENVINO_ASSERT(!utils::is_dynamic_value(allocation_size), "Buffer scratchpad size must be defined!");
             additional_size = std::max(allocation_size * buffer_expr->get_node()->get_element_type().size(), additional_size);
         }
diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp
index ff42dae602a54f..76a4c491c66983 100644
--- a/src/common/snippets/src/shape_inference/shape_inference.cpp
+++ b/src/common/snippets/src/shape_inference/shape_inference.cpp
@@ -39,7 +39,6 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry
         SHAPE_INFER_PREDEFINED(op::ConvertSaturation, PassThroughShapeInfer),
         SHAPE_INFER_PREDEFINED(op::Load, PassThroughShapeInfer),
         SHAPE_INFER_PREDEFINED(op::Store, PassThroughShapeInfer),
-        SHAPE_INFER_PREDEFINED(op::IntermediateMemoryBuffer, PassThroughShapeInfer),
         SHAPE_INFER_PREDEFINED(op::Fill, PassThroughShapeInfer),
         SHAPE_INFER_PREDEFINED(ov::op::v0::Parameter, PassThroughShapeInfer),
         SHAPE_INFER_PREDEFINED(ov::op::v1::LogicalNot, PassThroughShapeInfer),
@@ -70,7 +69,7 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry
         SHAPE_INFER_OP_SPECIFIC(op::RankNormalization),
         SHAPE_INFER_OP_SPECIFIC(op::BroadcastLoad),
         SHAPE_INFER_OP_SPECIFIC(op::BroadcastMove),
-        SHAPE_INFER_OP_SPECIFIC(op::NewMemoryBuffer),
+        SHAPE_INFER_OP_SPECIFIC(op::Buffer),
 };
 #undef SHAPE_INFER_OP_SPECIFIC_EXTERNAL
 #undef SHAPE_INFER_OP_SPECIFIC
diff --git a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp
index 4dc3f2dae7e867..ac521631917897 100644
--- a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp
+++ b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp
@@ -82,11 +82,9 @@ void BufferAllocationTest::ApplyTransformations(const std::shared_ptr<ov::snippe
 
 void BufferAllocationTest::Validate() {
     std::set<size_t> reg_groups, clusters;
-    for (const auto& expr : m_linear_ir) {
-        if (const auto buffer = ov::as_type_ptr<ov::snippets::op::Buffer>(expr->get_node())) {
-            reg_groups.insert(buffer->get_reg_group());
-            clusters.insert(buffer->get_cluster_id());
-        }
+    for (const auto& buffer_expr : m_linear_ir.get_buffers()) {
+        reg_groups.insert(buffer_expr->get_reg_group());
+        clusters.insert(buffer_expr->get_cluster_id());
     }
     EXPECT_EQ(reg_groups.size(), m_expected_reg_group_count);
     EXPECT_EQ(clusters.size(), m_expected_cluster_count);
@@ -100,9 +98,9 @@ std::shared_ptr<ov::Model> EltwiseBufferAllocationTest::GetModel() const {
     const auto parameter0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape({1, 3, 100, 100}));
     const auto parameter1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape({1, 3, 100, 100}));
     const auto add = std::make_shared<ov::op::v1::Add>(parameter0, parameter1);
-    const auto buffer0 = std::make_shared<ov::snippets::op::IntermediateMemoryBuffer>(add);
+    const auto buffer0 = std::make_shared<ov::snippets::op::Buffer>(add);
     const auto relu = std::make_shared<ov::op::v0::Relu>(buffer0);
-    const auto buffer1 = std::make_shared<ov::snippets::op::IntermediateMemoryBuffer>(relu);
+    const auto buffer1 = std::make_shared<ov::snippets::op::Buffer>(relu);
     const auto exp = std::make_shared<ov::op::v0::Exp>(buffer1);
     const auto body = std::make_shared<ov::Model>(std::make_shared<ov::op::v0::Result>(exp), ov::ParameterVector{parameter0, parameter1});
 
diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp
index 136dccb5fac667..e9ed04bf8da5a4 100644
--- a/src/common/snippets/tests/src/lowering_utils.cpp
+++ b/src/common/snippets/tests/src/lowering_utils.cpp
@@ -51,8 +51,7 @@ DummyTargetMachine::DummyTargetMachine(const std::vector<ov::Node::type_info_t>&
     jitters[ov::snippets::op::PerfCountEnd::get_type_info_static()] = dummy_functor;
 #endif
     jitters[ov::snippets::op::Brgemm::get_type_info_static()] = dummy_functor;
-    jitters[ov::snippets::op::IntermediateMemoryBuffer::get_type_info_static()] = dummy_functor;
-    jitters[ov::snippets::op::NewMemoryBuffer::get_type_info_static()] = dummy_functor;
+    jitters[ov::snippets::op::Buffer::get_type_info_static()] = dummy_functor;
     jitters[ov::snippets::op::VectorBuffer::get_type_info_static()] = dummy_functor;
     jitters[ov::snippets::op::Fill::get_type_info_static()] = dummy_functor;
     jitters[ov::snippets::op::ReduceMax::get_type_info_static()] = dummy_functor;
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp
index 9345b79c37e710..806253f0cc2155 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp
@@ -38,6 +38,7 @@ jit_kernel_emitter::jit_kernel_emitter(jit_generator* h, cpu_isa_t isa, const ov
     jcp = *reinterpret_cast<const jit_snippets_compile_args*>(kernel->compile_params);
     const auto& parameters = body->get_parameters();
     const auto& results = body->get_results();
+    const auto& buffers = body->get_buffers();
     num_inputs = parameters.size();
     num_outputs = results.size();
     for (const auto& param : parameters)
@@ -46,19 +47,20 @@ jit_kernel_emitter::jit_kernel_emitter(jit_generator* h, cpu_isa_t isa, const ov
         mem_access_exprs.push_back(result);
 
     std::set<size_t> unique_buffers;
-    for (const auto& expr : *body) {
-        if (const auto buffer = ov::as_type_ptr<snippets::op::Buffer>(expr->get_node())) {
-            const auto buffer_id = buffer->get_cluster_id();
-            if (unique_buffers.count(buffer_id) == 0) {
-                mem_access_exprs.push_back(expr);
-                unique_buffers.insert(buffer_id);
-            }
-        } else {
-            if (std::find(parameters.cbegin(), parameters.cend(), expr) == parameters.cend() &&
-                std::find(results.cbegin(), results.cend(), expr) == results.cend())
-                general_exprs.emplace_back(expr);
+    for (const auto& buffer_expr : buffers) {
+        const auto buffer_reg_group = buffer_expr->get_reg_group();
+        if (unique_buffers.count(buffer_reg_group) == 0) {
+            mem_access_exprs.push_back(buffer_expr);
+            unique_buffers.insert(buffer_reg_group);
         }
     }
+
+    for (const auto& expr : *body) {
+        if (std::find(parameters.cbegin(), parameters.cend(), expr) == parameters.cend() &&
+            std::find(results.cbegin(), results.cend(), expr) == results.cend() &&
+            std::find(buffers.cbegin(), buffers.cend(), expr) == buffers.cend())
+            general_exprs.emplace_back(expr);
+    }
     num_unique_buffers = unique_buffers.size();
 }
 
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
index 01a87d849f9731..1da6cd7121487f 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
@@ -159,8 +159,7 @@ intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t ho
     // data movement
     jitters[op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
     jitters[op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
-    jitters[snippets::op::IntermediateMemoryBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
-    jitters[snippets::op::NewMemoryBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
+    jitters[snippets::op::Buffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
     jitters[snippets::op::VectorBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
     jitters[snippets::op::RankNormalization::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
     jitters[snippets::op::Reshape::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp
index ff38c5586af106..4c36aa3b21ab35 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp
@@ -41,7 +41,7 @@ jit_brgemm_emitter::jit_brgemm_emitter(jit_generator* h, cpu_isa_t isa,
                               "Jit emitter is called when the shapes are unknown");
     auto get_cluster_id = [](const snippets::lowered::ExpressionPort& p) {
         // Note: NewMemoryBuffer is used as a scratchpad and can't be dynamic, so we don't need to account for them here
-        if (const auto buffer = ov::as_type_ptr<ov::snippets::op::IntermediateMemoryBuffer>(p.get_expr()->get_node()))
+        if (const auto buffer = ov::as_type_ptr<ov::snippets::lowered::BufferExpression>(p.get_expr()))
             return buffer->get_cluster_id();
         else
             return SIZE_MAX;
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
index ff58ef8b0a5bcb..a86bf841c241da 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
@@ -23,6 +23,7 @@ jit_kernel_emitter::jit_kernel_emitter(jit_generator* h, cpu_isa_t isa, const ov
     jcp = *reinterpret_cast<const jit_snippets_compile_args*>(kernel->compile_params);
     const auto& parameters = body->get_parameters();
     const auto& results = body->get_results();
+    const auto& buffers = body->get_buffers();
     num_inputs = parameters.size();
     num_outputs = results.size();
     for (const auto& param : parameters)
@@ -31,19 +32,20 @@ jit_kernel_emitter::jit_kernel_emitter(jit_generator* h, cpu_isa_t isa, const ov
         mem_access_exprs.push_back(result);
 
     std::set<size_t> unique_buffers;
-    for (const auto& expr : *body) {
-        if (const auto buffer = ov::as_type_ptr<snippets::op::Buffer>(expr->get_node())) {
-            const auto buffer_reg_group = buffer->get_reg_group();
-            if (unique_buffers.count(buffer_reg_group) == 0) {
-                mem_access_exprs.push_back(expr);
-                unique_buffers.insert(buffer_reg_group);
-            }
-        } else {
-            if (std::find(parameters.cbegin(), parameters.cend(), expr) == parameters.cend() &&
-                std::find(results.cbegin(), results.cend(), expr) == results.cend())
-                general_exprs.emplace_back(expr);
+    for (const auto& buffer_expr : buffers) {
+        const auto buffer_reg_group = buffer_expr->get_reg_group();
+        if (unique_buffers.count(buffer_reg_group) == 0) {
+            mem_access_exprs.push_back(buffer_expr);
+            unique_buffers.insert(buffer_reg_group);
         }
     }
+
+    for (const auto& expr : *body) {
+        if (std::find(parameters.cbegin(), parameters.cend(), expr) == parameters.cend() &&
+            std::find(results.cbegin(), results.cend(), expr) == results.cend() &&
+            std::find(buffers.cbegin(), buffers.cend(), expr) == buffers.cend())
+            general_exprs.emplace_back(expr);
+    }
     num_unique_buffers = unique_buffers.size();
 }
 
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp
index 1d8c26e3d709fa..f2fd978edc6aaf 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp
@@ -60,7 +60,7 @@ size_t jit_memory_emitter::aux_gprs_count() const {
 size_t jit_memory_emitter::get_parent_buffer_cluster_id(const ov::snippets::lowered::ExpressionPtr& expr) {
     OV_CPU_JIT_EMITTER_ASSERT(expr->get_input_port_connectors().size() == 1, "MemoryAccess must have one parent");
     const auto& parent_expr = expr->get_input_port_connector(0)->get_source().get_expr();
-    if (const auto buffer = ov::as_type_ptr<ov::snippets::op::Buffer>(parent_expr->get_node())) {
+    if (const auto buffer = ov::as_type_ptr<ov::snippets::lowered::BufferExpression>(parent_expr)) {
         return buffer->get_cluster_id();
     }
     return SIZE_MAX;
@@ -70,7 +70,7 @@ size_t jit_memory_emitter::get_consumer_buffer_cluster_id(const ov::snippets::lo
     OV_CPU_JIT_EMITTER_ASSERT(expr->get_output_port_connectors().size() == 1, "MemoryAccess must have one consumer");
     const auto& consumers = expr->get_output_port_connector(0)->get_consumers();
     for (const auto& consumer : consumers)
-        if (const auto buffer = ov::as_type_ptr<ov::snippets::op::Buffer>(consumer.get_expr()->get_node()))
+        if (const auto buffer = ov::as_type_ptr<ov::snippets::lowered::BufferExpression>(consumer.get_expr()))
             return buffer->get_cluster_id();
     return SIZE_MAX;
 }
diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index d5a8801ffedeac..5e43da6e2bfb86 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -161,12 +161,11 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
     OP_EXTENSION(ov::snippets::op::HorizonSum)               \
     OP_EXTENSION(ov::snippets::op::KernelStatic)             \
     OP_EXTENSION(ov::snippets::op::KernelDynamic)            \
-    OP_EXTENSION(ov::snippets::op::IntermediateMemoryBuffer) \
     OP_EXTENSION(ov::snippets::op::Load)                     \
     OP_EXTENSION(ov::snippets::op::LoadReshape)              \
     OP_EXTENSION(ov::snippets::op::LoopBegin)                \
     OP_EXTENSION(ov::snippets::op::LoopEnd)                  \
-    OP_EXTENSION(ov::snippets::op::NewMemoryBuffer)          \
+    OP_EXTENSION(ov::snippets::op::Buffer)                   \
     OP_EXTENSION(ov::snippets::op::Nop)                      \
     OP_EXTENSION(ov::snippets::op::PowerStatic)              \
     OP_EXTENSION(ov::snippets::op::Scalar)                   \
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
index e166fc8bf453e7..8d04c41676b193 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -21,6 +21,7 @@
 #include "snippets/lowered/pass/optimize_domain.hpp"
 #include "snippets/lowered/pass/insert_loops.hpp"
 #include "snippets/lowered/pass/mark_loops.hpp"
+#include "snippets/lowered/pass/insert_buffers.hpp"
 #include "transformations/defs.hpp"
 #include "transformations/cpu_opset/common/pass/convert_to_swish_cpu.hpp"
 #include "transformations/snippets/common/pass/mul_add_to_fma.hpp"
@@ -32,7 +33,7 @@
 #else
 #include "emitters/snippets/x64/cpu_generator.hpp"
 #include "transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.hpp"
-#include "transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp"
+#include "transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.hpp"
 #include "transformations/snippets/x64/pass/remove_converts.hpp"
 #include "transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.hpp"
 #include "transformations/snippets/x64/pass/enforce_precision.hpp"
@@ -682,8 +683,8 @@ Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const {
                                     ov::intel_cpu::pass::FuseLoadStoreConvert);
 
 #if defined(OPENVINO_ARCH_X86_64)
-    SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, ov::intel_cpu::pass::FuseLoadStoreConvert,
-                                    ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape);
+    SNIPPETS_REGISTER_PASS_RELATIVE(Place::Before, ov::snippets::lowered::pass::InsertBuffers,
+                                    ov::intel_cpu::pass::InsertBrgemmCopyBBuffers);
 #endif
 
 #ifdef SNIPPETS_LIBXSMM_TPP
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp
index cc30edef38086f..dfe4441de90699 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_cpu.cpp
@@ -137,7 +137,7 @@ std::shared_ptr<BrgemmCopyB> BrgemmCPU::get_brgemm_copy() const {
     if (const auto brgemm_copy_b = ov::as_type_ptr<BrgemmCopyB>(b_input_node)) {
         return brgemm_copy_b;
     }
-    if (ov::is_type<snippets::op::IntermediateMemoryBuffer>(b_input_node)) {
+    if (ov::is_type<snippets::op::Buffer>(b_input_node)) {
         if (const auto brgemm_copy_b = ov::as_type_ptr<BrgemmCopyB>(b_input_node->get_input_node_shared_ptr(0))) {
             return brgemm_copy_b;
         }
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp
index e5fac40ac09604..af70218ce0635f 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp
@@ -77,46 +77,6 @@ size_t get_elems_in_vec(const ov::element::Type& precision) {
 }
 
 namespace repacking {
-size_t get_repacking_buffer_size(const ov::snippets::lowered::ExpressionPtr& copy_b_expr) {
-    OPENVINO_ASSERT(ov::is_type<ov::intel_cpu::BrgemmCopyB>(copy_b_expr->get_node()));
-    const auto& in_desc = copy_b_expr->get_input_port_descriptor(0);
-    const auto& in_layout = in_desc->get_layout();
-    const auto& in_subtensor = ov::snippets::utils::get_projected_subtensor(copy_b_expr->get_input_port(0));
-
-    const size_t n_blk = *in_subtensor.rbegin();
-    const size_t k_blk = *++in_subtensor.rbegin();
-    OPENVINO_ASSERT(!is_dynamic_value(n_blk) && !is_dynamic_value(k_blk), "get_repacking_buffer_size must be called with static subtensor values");
-
-    const auto& precision = copy_b_expr->get_node()->get_input_element_type(0);
-    // Repacking buffer shape is set in accordance to OneDNN requirements
-    const size_t N_dim = std::max(n_blk, compute_inner_n_block(precision));
-    if (!in_layout.empty() && in_layout.back() != in_layout.size() - 1) {
-        // In case of transpose, K dimension must be rounded-up to number of elems in vector register
-        // For the details, please see 'transpose16x8' and 'fixup16x16' implementations and usage in onednn/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp
-        const auto elems_in_vec = brgemm_utils::get_elems_in_vec(precision);
-        return N_dim * rnd_up(k_blk, elems_in_vec);
-    } else {
-        // Low precision repacking writes the result by m_brgemmVNNIFactor * m_inner_n_block blocks
-        // despite the actual size of the input data. Because of that we have to round-up the allocation shape to always have enough memory allocated.
-        // For the details, please see 'copy_4x64' and 'copy_2x32' implementations and usage in onednn/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp
-        const auto brgemmVNNIFactor = brgemm_utils::compute_vnni_factor(precision);
-        OPENVINO_ASSERT(brgemmVNNIFactor > 0, "brgemmVNNIFactor value must be positive.");
-        return N_dim * rnd_up(k_blk, brgemmVNNIFactor);
-    }
-}
-
-size_t get_compensations_buffer_size(const ov::snippets::lowered::ExpressionPtr& copy_b_expr) {
-    OPENVINO_ASSERT(ov::is_type<ov::intel_cpu::BrgemmCopyB>(copy_b_expr->get_node()));
-    const auto& in_subtensor = ov::snippets::utils::get_projected_subtensor(copy_b_expr->get_input_port(0));
-    const size_t n_blk = *in_subtensor.rbegin();
-    OPENVINO_ASSERT(!is_dynamic_value(n_blk), "get_compensations_buffer_size must be called with static subtensor values");
-    const auto& precision = copy_b_expr->get_node()->get_input_element_type(0);
-    // Compensations are computed during repacking, so we need to round-up allocation shape according to m_inner_n_block
-    // because of OneDNN implementation nuances (as in get_repacking_buffer_size).
-    // However, the compensations are computed by N dimension, so K dimension doesn't affect the compensations buffer
-    return std::max(n_blk, compute_inner_n_block(precision));
-}
-
 size_t compute_out_leading_dim(const size_t n_block, const ov::element::Type& precision) {
     return std::max(n_block, compute_inner_n_block(precision));
 }
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.hpp
index 32d2264822ad57..d0360e45a62e18 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.hpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.hpp
@@ -42,18 +42,6 @@ size_t compute_vnni_factor(const ov::element::Type& precision);
 size_t get_elems_in_vec(const ov::element::Type& precision);
 
 namespace repacking {
-/**
- * @brief Computes buffer size that OneDNN impl needs for repacked tensor
- * @param copy_b_expr Repacking expression whose information (tensor precision, layout, subtensors) is used for
- * buffer size computations
- */
-size_t get_repacking_buffer_size(const ov::snippets::lowered::ExpressionPtr& copy_b_expr);
-/**
- * @brief Computes buffer size that OneDNN impl needs for compensations
- * @param copy_b_expr Repacking expression whose information (tensor precision, subtensors) is used for
- * buffer size computations
- */
-size_t get_compensations_buffer_size(const ov::snippets::lowered::ExpressionPtr& copy_b_expr);
 /**
  * @brief Computes leading dimension (LDB) which must be used in brgemm and brgemm_copy_b emitters
  * @param n_block N block size shared between BrgemmCPU and BrgemmCopyB node
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp
index 3aff94fb7f20f9..6dda47e47326aa 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp
@@ -88,7 +88,7 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() {
                 set_full_port_desc(output);
 
             if (with_amx(brgemm_type)) {
-                const auto scratch = std::make_shared<snippets::op::NewMemoryBuffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
+                const auto scratch = std::make_shared<snippets::op::Buffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
                 brgemm_cpu = std::make_shared<BrgemmCPU>(brgemm->input_value(0), brgemm_repacking->output(0), scratch, brgemm_type,
                                                          offset_a, offset_b, 0, offset_c,
                                                          layout_a, std::vector<size_t>{}, layout_c);
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp
index a5382f5afed53f..73da2a786fbee8 100644
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp
@@ -34,8 +34,8 @@ std::shared_ptr<snippets::lowered::pass::PassBase> BrgemmCPUBlocking::DummyPass:
 LinearIR::constExprIt BrgemmCPUBlocking::move_new_memory_buffer(LinearIR& linear_ir, const LinearIR::constExprIt& brgemm_it) {
     const auto& brgemm_expr = brgemm_it->get();
     const auto wsp_expr = brgemm_expr->get_input_port_connector(2)->get_source().get_expr();
-    const auto wsp_buffer = ov::as_type_ptr<ov::snippets::op::NewMemoryBuffer>(wsp_expr->get_node());
-    OPENVINO_ASSERT(wsp_buffer, "Incorrect Scratchpad buffer for Brgemm AMX");
+    const auto wsp_buffer = ov::as_type_ptr<ov::snippets::lowered::BufferExpression>(wsp_expr);
+    OPENVINO_ASSERT(wsp_buffer && wsp_buffer->get_input_count() == 0, "Incorrect Scratchpad buffer for Brgemm AMX");
     // If scratchpad with temp memory is not explicitly before Brgemm, need to move to there.
     if (wsp_expr != *std::prev(brgemm_it)) {
         const auto wsp_it = linear_ir.find(wsp_expr);
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.cpp
new file mode 100644
index 00000000000000..bb209d6c282918
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.cpp
@@ -0,0 +1,140 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "insert_brgemm_copy_b_buffers.hpp"
+
+#include "snippets/lowered/loop_manager.hpp"
+#include "snippets/snippets_isa.hpp"
+#include "snippets/utils/utils.hpp"
+#include "snippets/itt.hpp"
+
+#include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
+#include "utils/general_utils.h"
+
+
+using namespace ov::intel_cpu::brgemm_utils::repacking;
+using namespace ov::snippets::lowered;
+
+namespace ov {
+namespace intel_cpu {
+namespace pass {
+
+bool InsertBrgemmCopyBBuffers::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) {
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertBrgemmCopyBBuffers")
+
+    const auto& factory = linear_ir.get_expr_factory();
+
+    auto insert_buffer = [&](const ExpressionPtr& copy_b_expr, size_t out_port, LinearIR::constExprIt insertion_pos) {
+        const auto& copy_b = ov::as_type_ptr<ov::intel_cpu::BrgemmCopyB>(copy_b_expr->get_node());
+        const auto& copy_b_out = copy_b_expr->get_output_port_connector(out_port);
+        const auto copy_b_consumers = copy_b_out->get_consumers();
+        OPENVINO_ASSERT(copy_b_consumers.size() == 1, "BufferCopyB must have only one consumer on each out port - Brgemm");
+        const auto& buffer_op = std::make_shared<ov::snippets::op::Buffer>(copy_b->output(out_port));
+        BufferExpressionPtr buffer_expr = nullptr;
+        if (out_port == 0) {
+            buffer_expr = factory->build<RepackedWeightsBufferExpression>(buffer_op, {copy_b_out});
+        } else if (out_port == 1 && with_compensations(copy_b->get_type())) {
+            buffer_expr = factory->build<CompensationsBufferExpression>(buffer_op, {copy_b_out});
+        } else {
+            OPENVINO_THROW("BrgemmCopyB has incorrect output ports");
+        }
+        return linear_ir.insert_expr(buffer_expr, LoopManager::get_common_outer_loops(copy_b_expr, copy_b_consumers.begin()->get_expr()),
+                                     true, insertion_pos, {copy_b_consumers});
+    };
+
+    bool modified = false;
+    for (auto expr_it = begin; expr_it != end; ++expr_it) {
+        const auto expr = *expr_it;
+        if (auto copy_b = ov::as_type_ptr<ov::intel_cpu::BrgemmCopyB>(expr->get_node())) {
+            for (size_t i = 0; i < expr->get_output_count(); ++i) {
+                expr_it = insert_buffer(expr, i, std::next(expr_it));
+            }
+            modified = true;
+        }
+    }
+    return modified;
+}
+
+InsertBrgemmCopyBBuffers::RepackedWeightsBufferExpression::RepackedWeightsBufferExpression(const RepackedWeightsBufferExpression& other)
+    : BufferExpression(other) {}
+
+InsertBrgemmCopyBBuffers::RepackedWeightsBufferExpression::RepackedWeightsBufferExpression(const std::shared_ptr<ov::Node>& n,
+    const std::shared_ptr<snippets::IShapeInferSnippetsFactory>& factory) : BufferExpression(n, factory) {}
+
+snippets::lowered::ExpressionPtr InsertBrgemmCopyBBuffers::RepackedWeightsBufferExpression::clone() const {
+    return std::shared_ptr<RepackedWeightsBufferExpression>(new RepackedWeightsBufferExpression(*this));
+}
+
+void InsertBrgemmCopyBBuffers::RepackedWeightsBufferExpression::validate() const {
+    BufferExpression::validate();
+    OPENVINO_ASSERT(get_input_count() == 1, "RepackedWeightsBufferExpression must have only one input");
+    const auto& parent_out = get_input_port_connector(0)->get_source();
+    OPENVINO_ASSERT(ov::is_type<ov::intel_cpu::BrgemmCopyB>(parent_out.get_expr()->get_node()) && parent_out.get_index() == 0,
+                    "RepackedWeightsBufferExpression expects BrgemmCopyB as parent expression");
+}
+
+void InsertBrgemmCopyBBuffers::RepackedWeightsBufferExpression::init_allocation_size(const std::shared_ptr<snippets::lowered::LoopManager>& loop_manager,
+                                                                                     size_t allocation_rank) {
+    const auto& parent_expr = get_input_port_connector(0)->get_source().get_expr();
+    const auto& in_layout =  parent_expr->get_input_port_descriptor(0)->get_layout();
+    const auto& in_subtensor = ov::snippets::utils::get_projected_subtensor(parent_expr->get_input_port(0));
+
+    const size_t n_blk = *in_subtensor.rbegin();
+    const size_t k_blk = *++in_subtensor.rbegin();
+    OPENVINO_ASSERT(!ov::snippets::utils::is_dynamic_value(n_blk) && !ov::snippets::utils::is_dynamic_value(k_blk),
+                    "RepackedWeightsBufferExpression supports only static subtensor values");
+
+    const auto& precision = get_node()->get_input_element_type(0);
+    // Repacking buffer shape is set in accordance to OneDNN requirements
+    const size_t N_dim = std::max(n_blk, compute_inner_n_block(precision));
+    if (!in_layout.empty() && in_layout.back() != in_layout.size() - 1) {
+        // In case of transpose, K dimension must be rounded-up to number of elems in vector register
+        // For the details, please see 'transpose16x8' and 'fixup16x16' implementations and usage in onednn/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp
+        const auto elems_in_vec = brgemm_utils::get_elems_in_vec(precision);
+        m_allocation_size = N_dim * rnd_up(k_blk, elems_in_vec);
+    } else {
+        // Low precision repacking writes the result by m_brgemmVNNIFactor * m_inner_n_block blocks
+        // despite the actual size of the input data. Because of that we have to round-up the allocation shape to always have enough memory allocated.
+        // For the details, please see 'copy_4x64' and 'copy_2x32' implementations and usage in onednn/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp
+        const auto brgemmVNNIFactor = brgemm_utils::compute_vnni_factor(precision);
+        OPENVINO_ASSERT(brgemmVNNIFactor > 0, "brgemmVNNIFactor value must be positive.");
+        m_allocation_size = N_dim * rnd_up(k_blk, brgemmVNNIFactor);
+    }
+}
+
+InsertBrgemmCopyBBuffers::CompensationsBufferExpression::CompensationsBufferExpression(const CompensationsBufferExpression& other)
+    : BufferExpression(other) {}
+
+InsertBrgemmCopyBBuffers::CompensationsBufferExpression::CompensationsBufferExpression(const std::shared_ptr<ov::Node>& n,
+    const std::shared_ptr<snippets::IShapeInferSnippetsFactory>& factory) : BufferExpression(n, factory) {}
+
+snippets::lowered::ExpressionPtr InsertBrgemmCopyBBuffers::CompensationsBufferExpression::clone() const {
+    return std::shared_ptr<CompensationsBufferExpression>(new CompensationsBufferExpression(*this));
+}
+
+void InsertBrgemmCopyBBuffers::CompensationsBufferExpression::validate() const {
+    BufferExpression::validate();
+    OPENVINO_ASSERT(get_input_count() == 1, "CompensationsBufferExpression must have only one input");
+    const auto& parent_out = get_input_port_connector(0)->get_source();
+    OPENVINO_ASSERT(ov::is_type<ov::intel_cpu::BrgemmCopyB>(parent_out.get_expr()->get_node()) && parent_out.get_index() == 1,
+                    "CompensationsBufferExpression expects BrgemmCopyB as parent expression");
+}
+
+void InsertBrgemmCopyBBuffers::CompensationsBufferExpression::init_allocation_size(const std::shared_ptr<snippets::lowered::LoopManager>& loop_manager,
+                                                                                   size_t allocation_rank) {
+    const auto& parent_expr = get_input_port_connector(0)->get_source().get_expr();
+    const auto& in_subtensor = ov::snippets::utils::get_projected_subtensor(parent_expr->get_input_port(0));
+    const size_t n_blk = *in_subtensor.rbegin();
+    OPENVINO_ASSERT(!ov::snippets::utils::is_dynamic_value(n_blk), "CompensationsBufferExpression supports only static subtensor values");
+    const auto& precision = parent_expr->get_node()->get_input_element_type(0);
+    // Compensations are computed during repacking, so we need to round-up allocation shape according to m_inner_n_block
+    // because of OneDNN implementation nuances (as in get_repacking_buffer_size).
+    // However, the compensations are computed by N dimension, so K dimension doesn't affect the compensations buffer
+    m_allocation_size = std::max(n_blk, compute_inner_n_block(precision));
+}
+
+}  // namespace pass
+}  // namespace intel_cpu
+}  // namespace ov
+
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.hpp
new file mode 100644
index 00000000000000..c13a239a39851d
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.hpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "snippets/lowered/pass/pass.hpp"
+
+#include "snippets/lowered/expressions/buffer_expression.hpp"
+
+namespace ov {
+namespace intel_cpu {
+namespace pass {
+
+/**
+ * @interface InsertBrgemmCopyBBuffers
+ * @brief Insert Buffers after BrgemmCopyB with algorithm of allocation size calculation which
+ *        distinguishes with common algorithm
+ * @ingroup snippets
+ */
+class InsertBrgemmCopyBBuffers: public snippets::lowered::pass::RangedPass {
+public:
+    InsertBrgemmCopyBBuffers() = default;
+    OPENVINO_RTTI("InsertBrgemmCopyBBuffers", "Pass");
+    bool run(snippets::lowered::LinearIR& linear_ir, snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) override;
+
+private:
+    class RepackedWeightsBufferExpression : public snippets::lowered::BufferExpression {
+        friend class snippets::lowered::ExpressionFactory;
+    public:
+        OPENVINO_RTTI("RepackedWeightsBufferExpression", "0", BufferExpression)
+        RepackedWeightsBufferExpression() = default;
+
+        void validate() const override;
+
+        void init_allocation_size(const std::shared_ptr<snippets::lowered::LoopManager>& loop_manager, size_t allocation_rank) override;
+
+    private:
+        RepackedWeightsBufferExpression(const RepackedWeightsBufferExpression& other);
+        RepackedWeightsBufferExpression(const std::shared_ptr<ov::Node>& n, const std::shared_ptr<snippets::IShapeInferSnippetsFactory>& factory);
+
+        snippets::lowered::ExpressionPtr clone() const override;
+    };
+
+    class CompensationsBufferExpression : public snippets::lowered::BufferExpression {
+        friend class snippets::lowered::ExpressionFactory;
+    public:
+        OPENVINO_RTTI("CompensationsBufferExpression", "0", BufferExpression)
+        CompensationsBufferExpression() = default;
+
+        void validate() const override;
+
+        void init_allocation_size(const std::shared_ptr<snippets::lowered::LoopManager>& loop_manager, size_t allocation_rank) override;
+
+    private:
+        CompensationsBufferExpression(const CompensationsBufferExpression& other);
+        CompensationsBufferExpression(const std::shared_ptr<ov::Node>& n, const std::shared_ptr<snippets::IShapeInferSnippetsFactory>& factory);
+
+        snippets::lowered::ExpressionPtr clone() const override;
+    };
+};
+
+}  // namespace pass
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.cpp
deleted file mode 100644
index 332c0cccaf4acc..00000000000000
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (C) 2018-2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "snippets/itt.hpp"
-
-#include "set_brgemm_copy_b_buffers_shape.hpp"
-#include "snippets/snippets_isa.hpp"
-#include "snippets/utils/utils.hpp"
-
-#include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
-#include "transformations/snippets/x64/op/brgemm_utils.hpp"
-
-using namespace ov::intel_cpu::brgemm_utils::repacking;
-
-bool ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape::run(snippets::lowered::LinearIR& linear_ir,
-                                                          snippets::lowered::LinearIR::constExprIt begin,
-                                                          snippets::lowered::LinearIR::constExprIt end) {
-    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SetBrgemmCopyBBuffersShape")
-
-    auto get_buffer_from_output = [](const snippets::lowered::ExpressionPtr& expr, const size_t out_idx) {
-        const auto& consumers = expr->get_output_port_connector(out_idx)->get_consumers();
-        OPENVINO_ASSERT(consumers.size() == 1, "BrgemmCopyB must have only 1 consumer");
-        const auto buffer = ov::as_type_ptr<ov::snippets::op::IntermediateMemoryBuffer>(consumers.begin()->get_expr()->get_node());
-        OPENVINO_ASSERT(buffer, "BrgemmCopyB consumer must be Buffer");
-        return buffer;
-    };
-
-    bool modified = false;
-    for (auto expr_it = begin; expr_it != end; ++expr_it) {
-        const auto& expr = *expr_it;
-        if (auto copy_b = ov::as_type_ptr<ov::intel_cpu::BrgemmCopyB>(expr->get_node())) {
-            const auto buffer = get_buffer_from_output(expr, 0);
-            buffer->set_allocation_size(get_repacking_buffer_size(expr));
-            if (with_compensations(copy_b->get_type())) {
-                const auto compensations_buffer = get_buffer_from_output(expr, 1);
-                compensations_buffer->set_allocation_size(get_compensations_buffer_size(expr));
-            }
-            modified = true;
-        }
-    }
-    return modified;
-}
diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp
deleted file mode 100644
index 1b348ecbf2740c..00000000000000
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (C) 2018-2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "snippets/lowered/pass/pass.hpp"
-
-namespace ov {
-namespace intel_cpu {
-namespace pass {
-
-/**
- * @interface SetBrgemmCopyBBuffersShape
- * @brief Sets the allocation shape for the Buffers after BrgemmCopyB node using BrgemmCopyB parameters
- *        This pass may be deprecated when a more generic memory management approach is introduced.
- *        Ticket: 113744
- * @ingroup snippets
- */
-class SetBrgemmCopyBBuffersShape: public snippets::lowered::pass::RangedPass {
-public:
-    SetBrgemmCopyBBuffersShape() = default;
-    OPENVINO_RTTI("SetBrgemmCopyBBuffersShape", "Pass");
-    bool run(snippets::lowered::LinearIR& linear_ir,
-             snippets::lowered::LinearIR::constExprIt begin,
-             snippets::lowered::LinearIR::constExprIt end) override;
-};
-
-}  // namespace pass
-}  // namespace intel_cpu
-}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp
index 4be2638e28b893..89f2e06c14a9fa 100644
--- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp
+++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp
@@ -310,7 +310,7 @@ TEST_F(BrgemmCPUBlockingTest, AMX) {
     {
         auto data_a = linear_ir->push_node<ov::opset10::Parameter>(precision, input_shape_a);
         auto data_b = linear_ir->push_node<ov::opset10::Parameter>(precision, input_shape_b);
-        auto scratch = linear_ir->push_node<snippets::op::NewMemoryBuffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
+        auto scratch = linear_ir->push_node<snippets::op::Buffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
         auto copy_b = linear_ir->push_node<BrgemmCopyB>(data_b.second, precision, BRGEMM_TYPE::REPACKING_ONLY);
         init_expr_descriptors(*copy_b.first);
         auto brgemm = linear_ir->push_node<BrgemmCPU>(data_a.second, copy_b.second, scratch.second, BRGEMM_TYPE::WITH_AMX);
@@ -324,7 +324,7 @@ TEST_F(BrgemmCPUBlockingTest, AMX) {
         const auto copy_b_expr = *copy_b.first;
         init_expr_descriptors(copy_b_expr, {{full_dim, full_dim}, {full_dim, full_dim}});
 
-        auto scratch = linear_ir_ref->push_node<snippets::op::NewMemoryBuffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
+        auto scratch = linear_ir_ref->push_node<snippets::op::Buffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
         scratch.first->get()->set_loop_ids({0});
 
         auto brgemm = linear_ir_ref->push_node<BrgemmCPU>(data_a.second, copy_b.second, scratch.second, BRGEMM_TYPE::WITH_AMX);
diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp
index 5434ff228aa833..6dad1d4772f531 100644
--- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp
+++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp
@@ -17,7 +17,7 @@
 
 #include "transformations/snippets/x64/shape_inference.hpp"
 #include "transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.hpp"
-#include "transformations/snippets/x64/pass/lowered/set_brgemm_copy_b_buffers_shape.hpp"
+#include "transformations/snippets/x64/pass/lowered/insert_brgemm_copy_b_buffers.hpp"
 #include "transformations/snippets/x64/op/brgemm_cpu.hpp"
 #include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
 
@@ -85,22 +85,20 @@ class BufferAllocationCPUTest : public testing::TestWithParam<BufferAllocationCP
         pipeline.register_pass<ov::snippets::lowered::pass::ReduceDecomposition>(m_vector_size);
         pipeline.register_pass<ov::snippets::lowered::pass::FuseLoops>();
         pipeline.register_pass<ov::snippets::lowered::pass::SplitLoops>();
+        pipeline.register_pass<ov::intel_cpu::pass::InsertBrgemmCopyBBuffers>();
         pipeline.register_pass<ov::snippets::lowered::pass::InsertBuffers>();
         pipeline.register_pass<ov::snippets::lowered::pass::InsertLoadStore>(m_vector_size);
         pipeline.register_pass<ov::snippets::lowered::pass::InitLoops>();
         pipeline.register_pass<ov::snippets::lowered::pass::InsertLoops>();
-        pipeline.register_pass<ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape>();
         pipeline.register_pass<ov::snippets::lowered::pass::AllocateBuffers>(m_is_buffer_optimized);
         pipeline.run(m_linear_ir);
     }
 
     void Validate() {
         std::set<size_t> reg_groups, clusters;
-        for (const auto& expr : m_linear_ir) {
-            if (const auto buffer = ov::as_type_ptr<ov::snippets::op::Buffer>(expr->get_node())) {
-                reg_groups.insert(buffer->get_reg_group());
-                clusters.insert(buffer->get_cluster_id());
-            }
+        for (const auto& buffer : m_linear_ir.get_buffers()) {
+            reg_groups.insert(buffer->get_reg_group());
+            clusters.insert(buffer->get_cluster_id());
         }
         EXPECT_EQ(reg_groups.size(), m_expected_reg_group_count);
         EXPECT_EQ(clusters.size(), m_expected_cluster_count);
@@ -211,7 +209,7 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
         const auto convert1 = std::make_shared<ov::snippets::op::ConvertSaturation>(relu0, ov::element::bf16);
 
         const auto brgemm_copyb0 = std::make_shared<ov::intel_cpu::BrgemmCopyB>(convert1, ov::element::bf16);
-        const auto scratch0 = std::make_shared<ov::snippets::op::NewMemoryBuffer>(ov::Shape{ov::intel_cpu::BrgemmCPU::SCRATCH_BYTE_SIZE});
+        const auto scratch0 = std::make_shared<ov::snippets::op::Buffer>(ov::Shape{ov::intel_cpu::BrgemmCPU::SCRATCH_BYTE_SIZE});
         const auto brgemm_cpu0 = std::make_shared<ov::intel_cpu::BrgemmCPU>(
             parameter0, brgemm_copyb0->output(0), scratch0, BRGEMM_TYPE::WITH_AMX);
 
@@ -231,7 +229,7 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
         const auto convert2 = std::make_shared<ov::snippets::op::ConvertSaturation>(multiply, ov::element::bf16);
 
         const auto brgemm_copyb1 = std::make_shared<ov::intel_cpu::BrgemmCopyB>(parameter2, ov::element::bf16);
-        const auto scratch1 = std::make_shared<ov::snippets::op::NewMemoryBuffer>(ov::Shape{ov::intel_cpu::BrgemmCPU::SCRATCH_BYTE_SIZE});
+        const auto scratch1 = std::make_shared<ov::snippets::op::Buffer>(ov::Shape{ov::intel_cpu::BrgemmCPU::SCRATCH_BYTE_SIZE});
         const auto brgemm_cpu1 = std::make_shared<ov::intel_cpu::BrgemmCPU>(
             convert2, brgemm_copyb1->output(0), scratch1, BRGEMM_TYPE::WITH_AMX);