diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp
index 72a62781580cda..e7e5121b1240e7 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp
@@ -16,8 +16,6 @@ namespace ov {
 namespace npuw {
 namespace online {
 
-class Group;  // forward declaration
-
 namespace detail {
 // At partitioning level we exclude some "non-Ops" to not interfere with the passes.
 // We include some of them back to properly link everything at plugin level
@@ -33,6 +31,8 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {
           m_node_to_prod_cons(std::make_shared<detail::OVNodeMap>()),
           m_node_to_gr(std::make_shared<detail::OVNodeToGroupMap>()) {}
 
+    friend class Group;  // forward declaration
+
     // Simple passes
     void singleGroup();
 
@@ -49,27 +49,27 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {
     void repeatedBlocks();
     void earlyAvoids();
     void earlyRegroup();
-    void markInternalCompute();
-    void resetExcludedRep();
 
     // Utility
     std::shared_ptr<own::ade::Graph> getGraph() const;
-    size_t graphSize() const;
-    const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const;
-    const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const;
     const detail::OVPortsMap& getPortsMap() const;
     const detail::OVNodeToGroupMapPtr& getNodeToGroupMap() const;
     const std::map<std::string, std::vector<std::set<std::string>>>& getMatches() const;
-    detail::GPtrSet getRepGroups(const std::shared_ptr<Group>& group) const;
     void repeat(detail::Pass&& pass);
     void setCtx(const PassContext& ctx);
+    size_t graphSize() const;
 
 private:
+    detail::GPtrSet getRepGroups(const std::shared_ptr<Group>& group) const;
+    const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const;
+    const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const;
     void identifyUniques();
     void mergeUniques();
     void mergeTriangles();
     void cleanUpUniques();
     void afterUniques();
+    void markInternalCompute();
+    void resetExcludedRep();
     bool cleanUpUniquesImpl(const detail::GPtrSet& gset);
     std::shared_ptr<Repeated> tryGrowRepeatingGroups(const detail::GPtrSet& repeating_groups);
     std::shared_ptr<Repeated> tryMergeTriangles(const detail::GPtrSet& repeating_groups);
diff --git a/src/plugins/intel_npu/tests/CMakeLists.txt b/src/plugins/intel_npu/tests/CMakeLists.txt
index 4c41f008eb7f81..0f5bd7a6b093b2 100644
--- a/src/plugins/intel_npu/tests/CMakeLists.txt
+++ b/src/plugins/intel_npu/tests/CMakeLists.txt
@@ -8,3 +8,4 @@ if (MSVC)
     ov_add_compiler_flags(/wd5105)
 endif()
 add_subdirectory(functional)
+add_subdirectory(unit)
diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt
new file mode 100644
index 00000000000000..861a0ff6a47076
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/CMakeLists.txt
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "ov_npu_unit_tests")
+
+set(MANDATORY_UNIT_TESTS_LIBS
+        "openvino::commonTestUtils"
+        "openvino::gmock"
+        "openvino::gtest"
+        "openvino::gtest_main"
+        "openvino::runtime"
+        "openvino::npu_al"
+        "openvino::npu_logger_utils"
+)
+
+ov_add_test_target(
+        NAME ${TARGET_NAME}
+        ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+        ADDITIONAL_SOURCE_DIRS
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw/
+        DEPENDENCIES
+            openvino::runtime
+        INCLUDES
+            ${CMAKE_CURRENT_SOURCE_DIR}
+            ${CMAKE_CURRENT_SOURCE_DIR}/npuw
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include
+        LINK_LIBRARIES
+            ${MANDATORY_UNIT_TESTS_LIBS}
+        LABELS
+            NPUW
+)
+
+if(ENABLE_AVX2)
+    ov_avx2_optimization_flags(avx2_flags)
+    target_compile_options(${TARGET_NAME} PRIVATE "${avx2_flags}")
+endif()
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION tests
+        COMPONENT tests
+        EXCLUDE_FROM_ALL
+)
diff --git a/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp
new file mode 100644
index 00000000000000..af1fc5de8e92c7
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp
@@ -0,0 +1,692 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <iostream>
+
+#include "partitioning/online/compiler.hpp"
+#include "partitioning/online/snapshot.hpp"
+#include "partitioning/online/group.hpp"
+
+#include "intel_npu/al/config/config.hpp"
+#include "intel_npu/al/config/npuw.hpp"
+
+#include "openvino/openvino.hpp"
+#include "openvino/op/ops.hpp"
+#include "openvino/op/util/op_types.hpp"
+
+bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2);
+bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2) {
+    if (ens1.groups.size() != ens2.groups.size()) {
+        return false;
+    }
+
+    for (auto& g : ens1.groups) {
+        std::sort(g.input_layers.begin(), g.input_layers.end());
+        std::sort(g.output_layers.begin(), g.output_layers.end());
+        std::sort(g.all_layers.begin(), g.all_layers.end());
+    }
+
+    for (auto& g : ens2.groups) {
+        std::sort(g.input_layers.begin(), g.input_layers.end());
+        std::sort(g.output_layers.begin(), g.output_layers.end());
+        std::sort(g.all_layers.begin(), g.all_layers.end());
+    }
+
+    std::sort(ens1.groups.begin(), ens1.groups.end(), [](const ov::npuw::Group& g1,
+                                                         const ov::npuw::Group& g2){
+                                                                return g1.all_layers.front() < g2.all_layers.front();
+                                                        });
+
+    std::sort(ens2.groups.begin(), ens2.groups.end(), [](const ov::npuw::Group& g1,
+                                                         const ov::npuw::Group& g2){
+                                                                return g1.all_layers.front() < g2.all_layers.front();
+                                                        });
+
+    for (size_t i = 0; i < ens1.groups.size(); ++i) {
+        const auto& g1 = ens1.groups.at(i);
+        const auto& g2 = ens2.groups.at(i);
+
+        if (g1.avoid_list != g2.avoid_list ||
+            g1.input_layers != g2.input_layers ||
+            g1.output_layers != g2.output_layers ||
+            g1.all_layers != g2.all_layers) {
+            return false;
+        }
+
+        // Can't compare them directly since they are random, but dont't affect the structure
+        if ((g1.repeated_id.empty() && !g2.repeated_id.empty()) ||
+            (!g1.repeated_id.empty() && g2.repeated_id.empty())) {
+            return false;
+        }
+    }
+
+    if (ens1.repeated.size() != ens2.repeated.size()) {
+        return false;
+    }
+
+    auto get_sorted_rep = [](const std::map<std::string, ov::npuw::RepeatedBlock>& rep) {
+        std::vector<std::vector<std::set<std::string>>> sorted_rep;
+
+        std::transform(rep.begin(), rep.end(), std::back_inserter(sorted_rep), [](const auto& v) {
+            return v.second.matches;
+        });
+
+        for (auto& g : sorted_rep) {
+            std::sort(g.begin(), g.end(),
+                    [](const auto& a, const auto& b) {return *a.begin() < *b.begin();});
+        }
+
+        std::sort(sorted_rep.begin(), sorted_rep.end(),
+                    [](const auto& a, const auto& b) {return *a.front().begin() < *b.front().begin();});
+
+        return sorted_rep;
+    };
+
+
+    if (get_sorted_rep(ens1.repeated) != get_sorted_rep(ens2.repeated)) {
+        return false;
+    }
+
+    return true;
+}
+
+class ModelGenerator {
+public:
+    ModelGenerator() = default;
+
+    std::shared_ptr<ov::Model> get_model_without_repeated_blocks() {
+        std::shared_ptr<ov::op::v0::Parameter> input = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{1, 1, 40});
+        m_nodes.push_back(input);
+        set_name(input);
+
+        std::shared_ptr<ov::Node> res = get_block(input);
+
+        auto result = std::make_shared<ov::op::v0::Result>(res);
+        m_nodes.push_back(result);
+        set_name(result);
+
+        ov::ParameterVector params = {input};
+        ov::ResultVector results = {result};
+
+        return std::make_shared<ov::Model>(results, params);
+    }
+
+    std::shared_ptr<ov::Model> get_model_with_repeated_blocks() {
+        // Generate head
+        std::shared_ptr<ov::op::v0::Parameter> input = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{1, 1, 40});
+        m_nodes.push_back(input);
+        set_name(input);
+
+        std::vector<std::shared_ptr<ov::Node>> head(7, nullptr);
+        head[0] = std::make_shared<ov::op::v1::Add>(input, input);
+        head[1] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector<int>{2});
+        head[2] = std::make_shared<ov::op::v1::Divide>(head[0], head[1], true);
+        head[3] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 4, 10});
+        head[4] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int>{1, 1, 40});
+        head[5] = std::make_shared<ov::op::v1::Reshape>(head[2], head[3], false);
+        head[6] = std::make_shared<ov::op::v1::Reshape>(head[5], head[4], false);
+
+        for (const auto& h : head) {
+            m_nodes.push_back(h);
+            set_name(h);
+        }
+
+        // Generate repeated blocks
+        std::shared_ptr<ov::Node> output = get_block(head[6]);
+        std::vector<std::shared_ptr<ov::Node>> outputs;
+        outputs.push_back(output);
+
+        for (size_t i = 0; i < 9; ++i) {
+            output = get_block(output);
+            outputs.push_back(output);
+        }
+
+        // Generate tail
+        std::vector<std::shared_ptr<ov::Node>> tail(6, nullptr);
+        tail[0] = std::make_shared<ov::op::v0::Concat>(outputs, -1);
+        tail[1] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int>{1, 20, 20});
+        tail[2] = std::make_shared<ov::op::v1::Reshape>(tail[0], tail[1], false);
+        tail[3] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1});
+        tail[4] = std::make_shared<ov::op::v1::Multiply>(tail[2], tail[3]);
+        tail[5] = std::make_shared<ov::op::v1::Add>(tail[4], tail[4]);
+
+        for (const auto& t : tail) {
+            m_nodes.push_back(t);
+            set_name(t);
+        }
+
+        // Create model
+        auto result = std::make_shared<ov::op::v0::Result>(tail[5]);
+        m_nodes.push_back(result);
+        set_name(result);
+
+        ov::ParameterVector params = {input};
+        ov::ResultVector results = {result};
+
+        return std::make_shared<ov::Model>(results, params);
+    }
+
+    std::shared_ptr<ov::Node> get_block(const std::shared_ptr<ov::Node>& input) {
+        // Parameters
+        // input
+
+        // Constants
+        std::vector<std::shared_ptr<ov::Node>> model_c(18, nullptr);
+        model_c[0] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{0, 2, 1, 3});
+        model_c[1] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{1});
+        model_c[2] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{0});
+        model_c[3] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{2});
+        model_c[4] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{0});
+        model_c[5] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 1, 1});
+        model_c[6] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{1});
+        model_c[7] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{0});
+        model_c[8] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 1, 1});
+        model_c[9] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{1, 1, 1, 2});
+        model_c[10] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{1, 1, 1, 1});
+        model_c[11] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{1, 1, 1, 2});
+        model_c[12] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1, 1});
+        model_c[13] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1, 1});
+        model_c[14] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1, 1});
+        model_c[15] = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{40, 40});
+        model_c[16] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 4, 10});
+        model_c[17] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int>{1, 1, 40});
+
+        for (const auto& c : model_c) {
+            m_nodes.push_back(c);
+            set_name(c);
+        }
+
+        // Converts
+        std::vector<std::shared_ptr<ov::Node>> convert(3, nullptr);
+        convert[0] = std::make_shared<ov::op::v0::Convert>(model_c[15], ov::element::f16);
+        convert[1] = std::make_shared<ov::op::v0::Convert>(convert[0], ov::element::i32);
+        convert[2] = std::make_shared<ov::op::v0::Convert>(model_c[12], ov::element::i32);
+
+        for (const auto& c : convert) {
+            m_nodes.push_back(c);
+            set_name(c);
+        }
+
+        // Ops
+        std::vector<std::shared_ptr<ov::Node>> op(16, nullptr);
+        op[0] = std::make_shared<ov::op::v0::MatMul>(input, convert[1], false, true);
+        op[1] = std::make_shared<ov::op::v1::Reshape>(op[0], model_c[16], false);
+        op[2] = std::make_shared<ov::op::v1::Transpose>(op[1], model_c[0]);
+        op[3] = std::make_shared<ov::op::v0::ShapeOf>(op[2]);
+        op[4] = std::make_shared<ov::op::v1::Gather>(op[3], model_c[1], model_c[2]);
+        op[5] = std::make_shared<ov::op::v1::Divide>(op[4], model_c[3], true);
+        op[6] = std::make_shared<ov::op::v0::Floor>(op[5]);
+        op[7] = std::make_shared<ov::op::v3::ScatterUpdate>(model_c[5], model_c[6], op[6], model_c[7]);
+        op[8] = std::make_shared<ov::op::v1::StridedSlice>(op[2],
+                                                            model_c[8],
+                                                            op[7],
+                                                            model_c[9],
+                                                            std::vector<int64_t>{1, 1, 1, 1},
+                                                            std::vector<int64_t>{1, 1, 1, 1});
+        op[9] = std::make_shared<ov::op::v1::StridedSlice>(op[2],
+                                                            op[7],
+                                                            model_c[10],
+                                                            model_c[11],
+                                                            std::vector<int64_t>{1, 1, 1, 1},
+                                                            std::vector<int64_t>{1, 1, 1, 1});
+        op[10] = std::make_shared<ov::op::v1::Multiply>(op[9], convert[2]);
+        op[11] = std::make_shared<ov::op::v0::Concat>(std::vector<std::shared_ptr<ov::Node>>{op[10], op[8]}, -1);
+        op[12] = std::make_shared<ov::op::v1::Multiply>(model_c[13], op[11]);
+        op[13] = std::make_shared<ov::op::v1::Multiply>(model_c[14], op[2]);
+        op[14] = std::make_shared<ov::op::v1::Add>(op[13], op[12]);
+        op[15] = std::make_shared<ov::op::v1::Reshape>(op[14], model_c[17], false);
+
+        for (const auto& o : op) {
+            m_nodes.push_back(o);
+            set_name(o);
+        }
+
+        return op[15];
+    }
+
+private:
+    void set_name(const std::shared_ptr<ov::Node>& node) {
+        node->set_friendly_name("node_" + std::to_string(m_name_idx++));
+    }
+
+    std::vector<std::shared_ptr<ov::Node>> m_nodes;
+    size_t m_name_idx;
+};
+
+TEST(OnlinePartitioningTest, Partitioning_IsTheSame_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>();
+    auto cfg = ::intel_npu::Config(opt_desc);
+    ::intel_npu::registerNPUWOptions(*opt_desc);
+    std::map<std::string, std::string> cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }};
+    cfg.update(cfg_map);
+
+    auto ens = ov::npuw::online::buildPartitioning(model, cfg);
+
+    for (size_t i = 0; i < 100; ++i) {
+        auto ens_again = ov::npuw::online::buildPartitioning(model, cfg);
+        EXPECT_TRUE(isEqualEns(ens, ens_again));
+    }
+}
+
+TEST(OnlinePartitioningTest, Partitioning_IsTheSame_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>();
+    auto cfg = ::intel_npu::Config(opt_desc);
+    ::intel_npu::registerNPUWOptions(*opt_desc);
+    std::map<std::string, std::string> cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }};
+    cfg.update(cfg_map);
+
+    auto ens = ov::npuw::online::buildPartitioning(model, cfg);
+
+    for (size_t i = 0; i < 100; ++i) {
+        auto ens_again = ov::npuw::online::buildPartitioning(model, cfg);
+        EXPECT_TRUE(isEqualEns(ens, ens_again));
+    }
+}
+
+TEST(OnlinePartitioningTest, Partitioning_SingleGroup_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->singleGroup();
+    EXPECT_EQ(snap->graphSize(), 1);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_SingleGroup_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->singleGroup();
+    EXPECT_EQ(snap->graphSize(), 1);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_buildGraph_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+    auto g = snap->getGraph();
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+    }
+    EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize());
+}
+
+TEST(OnlinePartitioningTest, Partitioning_buildGraph_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+    auto g = snap->getGraph();
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+    }
+    EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize());
+}
+
+TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    ov::npuw::online::PassContext ctx;
+    ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}};
+    snap->setCtx(ctx);
+    snap->buildGraph();
+    snap->earlyAvoids();
+    auto g = snap->getGraph();
+    size_t count = 0;
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+        if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") {
+            ++count;
+        }
+    }
+    EXPECT_EQ(count, 2);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    ov::npuw::online::PassContext ctx;
+    ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}};
+    snap->setCtx(ctx);
+    snap->buildGraph();
+    snap->earlyAvoids();
+    auto g = snap->getGraph();
+    size_t count = 0;
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+        if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") {
+            ++count;
+        }
+    }
+    EXPECT_EQ(count, 20);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_collectLHF_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->collectLHF();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_collectLHF_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {82, 82};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->collectLHF();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnants();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {75, 38, 19, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnants();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseInputs_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {15, 14, 14};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseInputs();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseInputs_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {148, 138, 138};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseInputs();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes_lhf = {10, 10};
+    size_t iter_lhf = 0;
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    snap->repeat([&] {
+        snap->collectLHF();
+        EXPECT_LT(iter_lhf, sizes_lhf.size());
+        EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]);
+    });
+    snap->repeat([&] {
+        snap->fuseRemnants();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes_lhf = {82, 82};
+    size_t iter_lhf = 0;
+
+    std::vector<std::size_t> sizes_fr = {41, 21, 11, 10, 10};
+    size_t iter_fr = 0;
+
+    snap->repeat([&] {
+        snap->collectLHF();
+        EXPECT_LT(iter_lhf, sizes_lhf.size());
+        EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]);
+    });
+    snap->repeat([&] {
+        snap->fuseRemnants();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 17);
+
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 0);
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+
+    std::vector<std::size_t> sizes_fr = {12, 12};
+    size_t iter_fr = 0;
+
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 18);
+
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 1);
+
+    for (const auto& m : matches) {
+        EXPECT_EQ(m.second.size(), 17);
+        for (const auto& layers : m.second) {
+            EXPECT_EQ(layers.size(), 10);
+        }
+    }
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    ov::npuw::online::PassContext ctx;
+    ctx.isolates = {{ov::npuw::online::PatternType::OP, "Transpose", "test_compute"}, {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"}};
+    ctx.nofolds = {"test_compute"};
+    snap->setCtx(ctx);
+
+    snap->buildGraph();
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 17);
+
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 0);
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    ov::npuw::online::PassContext ctx;
+    ctx.isolates = {{ov::npuw::online::PatternType::OP, "Gather", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "ShapeOf", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "Divide", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "Floor", "test_compute"}};
+    ctx.nofolds = {"test_compute"};
+    snap->setCtx(ctx);
+
+    snap->buildGraph();
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 29);
+
+    // FIXME: create a config in which there will be repeated blocks
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 0);
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp
new file mode 100644
index 00000000000000..1049832f6ead7c
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef HAVE_AVX2
+#include "unpack.hpp"
+
+namespace {
+
+const auto TestCases = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::i4}),
+        ::testing::ValuesIn({ov::element::Type_t::i8, ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test
+        ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1, 1, 1, 32};},
+                             Tensors{input={1,1,1, 128};},
+                             Tensors{input={1,1,1, 390};},
+                             Tensors{input={1,1,1, 82};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTests, UnpackTests,
+                         TestCases,
+                         UnpackTests::getTestCaseName);
+
+const auto TestCasesScale = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::i4}), // TODO: add i8 as input for test
+        ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}),
+        ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}),
+        ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1,32, 128};     scale = {1, 32, 1};},
+                             Tensors{input={32, 128};       scale = {32, 1};},
+                             Tensors{input={64, 160};       scale = {64, 1};},
+                             Tensors{input={1024, 4};       scale = {64, 1};},
+                             Tensors{input={1, 1, 1024, 4}; scale = {1, 1, 64, 1};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackWithScaleTests, UnpackWithScaleTests,
+                         TestCasesScale,
+                         UnpackWithScaleTests::getTestCaseName);
+
+
+const auto TestCasesScaleAndZeroPoints = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1,32, 128};     scale = {1, 32, 1};},
+                             Tensors{input={1,64, 160};     scale = {1, 64, 1};},
+                             Tensors{input={1,1024, 4};     scale = {1, 64, 1};},
+                             Tensors{input={1,1, 1024, 4};  scale = {1, 1, 64, 1};},
+                             Tensors{input={64, 1};         scale = {64, 1};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPoint, UnpackTestsWithScaleAndZeroPoint,
+                         TestCasesScaleAndZeroPoints,
+                         UnpackTestsWithScaleAndZeroPoint::getTestCaseName);
+
+const auto TestCasesScaleAndZeroPoints2 = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::f32}),
+        ::testing::ValuesIn({ov::element::Type_t::f32}),
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={32, 32, 64};    scale = {32, 1, 64};},
+                             Tensors{input={64, 64, 128};   scale = {64, 1, 128};},
+                             Tensors{input={64, 32, 32};    scale = {64, 1, 32};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest2, UnpackTestsWithScaleAndZeroPointTest2,
+                         TestCasesScaleAndZeroPoints2,
+                         UnpackTestsWithScaleAndZeroPointTest2::getTestCaseName);
+
+const auto TestCasesScaleAndZeroPoints3 = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1, 32, 128};     scale = {1, 32, 1};   zerop = {1, 32, 1};},
+                             Tensors{input={16, 64, 64};     scale = {16, 64, 1};  zerop = {16, 64, 1};},
+                             Tensors{input={1, 1024, 4};     scale = {1, 64, 1};   zerop = {1, 32, 1};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest3, UnpackTestsWithScaleAndZeroPointTest3,
+                         TestCasesScaleAndZeroPoints3,
+                         UnpackTestsWithScaleAndZeroPointTest3::getTestCaseName);
+
+} // anonymous namespace
+
+#endif // __AVX2__
diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp
new file mode 100644
index 00000000000000..da5bb4e4720f3e
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp
@@ -0,0 +1,628 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock-matchers.h>
+#include <gtest/gtest.h>
+#include <immintrin.h>
+#include <cstdint>
+#include <iomanip>
+#include <iostream>
+#include <array>
+
+#include "openvino/runtime/make_tensor.hpp"
+
+#include "util.hpp"
+
+namespace {
+
+#define ASSERT_NO_THROW_WITH_MESSAGE(code) do{ \
+    try {\
+     code;\
+     }catch (const std::exception &ex ) {\
+         FAIL()<<ex.what();\
+     }catch (...) {\
+         FAIL() << "Unknown exception";\
+     }\
+}while(0)
+
+#define ASSERT_NO_THROW_IF(condition, code) do { \
+if (condition) {ASSERT_NO_THROW_WITH_MESSAGE(code);} else {ASSERT_ANY_THROW(code);} \
+}while(0);
+
+namespace details {
+
+inline int8_t hi4(int8_t x) {
+    return ((x & (1 << 7)) >> 4) | ((x & (1 << 6)) >> 4) | ((x & (1 << 5)) >> 4) | ((x & (1 << 4)) >> 4);
+}
+
+inline int8_t lo4(int8_t x) {
+    return (x & (1 << 3)) | (x & (1 << 2)) | (x & (1 << 1)) | (x & (1 << 0));
+}
+
+inline uint8_t hi4(uint8_t x) {
+    return x >> 4;
+}
+
+inline uint8_t lo4(uint8_t x) {
+    return x & 0x0F;
+}
+
+inline int8_t upc(int8_t h) {
+    return h | (-((h & (1 << 3)) >> 3) & (-8));
+}
+
+typedef unsigned short ushort;
+typedef unsigned int uint;
+
+float half_to_float(const ushort x) {
+
+    __m128i halfVector = _mm_cvtsi32_si128(x);
+    __m128 floatVector = _mm_cvtph_ps(halfVector);
+    return _mm_cvtss_f32(floatVector);
+}
+
+ushort float_to_half(const float x) {
+    __m128 floatVector = _mm_set_ss(x);
+    __m128i halfVector = _mm_cvtps_ph(floatVector, _MM_FROUND_TO_NEAREST_INT);
+    return _mm_extract_epi16(halfVector, 0);
+}
+
+inline uint16_t int2hfloat(int8_t x)
+{
+    float inputFl32 = static_cast<float>(x);
+    float* inputFl32_ptr = &inputFl32;
+    unsigned int* fltInt32Ptr = reinterpret_cast<unsigned int*>(inputFl32_ptr);
+    unsigned int fltInt32 = *fltInt32Ptr;
+    unsigned short fltInt16;
+
+    fltInt16 = (fltInt32 >> 31) << 5;
+    unsigned short tmp = (fltInt32 >> 23) & 0xff;
+    tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);
+    fltInt16 = (fltInt16 | tmp) << 10;
+    fltInt16 |= (fltInt32 >> 13) & 0x3ff;
+
+    return fltInt16;
+}
+
+
+void unpack(const int8_t* in, int8_t* out, int size) {
+    for (int i = 0; i < size / 2; i++) {
+        *(out++) = upc(lo4(*in));
+        *(out++) = upc(hi4(*in));
+        in++;
+    }
+}
+
+void unpack_i4f16(const int8_t* in, int8_t* out, int size) {
+    uint16_t *hFloatOut = reinterpret_cast<uint16_t *>(out);
+
+    for (int i = 0; i < size / 2; i++) {
+        *(hFloatOut++) = int2hfloat(upc(lo4(*in)));
+        *(hFloatOut++) = int2hfloat(upc(hi4(*in)));
+        in++;
+    }
+}
+
+/*u4 order*/
+void unpack_u4f32(const int8_t* in, float* out, int size) {
+    for (int i = 0; i < size / 2; i++) {
+        *(out++) = static_cast<float>(lo4(*in));
+        *(out++) = static_cast<float>(hi4(*in));
+        in++;
+    }
+}
+
+template<typename T>
+::testing::AssertionResult fp16ArraysMatch(const T &actual,
+                                           const T &expected,
+                                           const T &i4Input,
+                                           bool int4 = 1 /*i4 or u4*/){
+    for (size_t i = 0; i < expected.size() / 2; ++i) {
+
+        int int8Input[] ={
+                details::lo4(i4Input[i / 2]),
+                details::hi4(i4Input[i / 2])
+        };
+
+        if (int4) {
+            int8Input[0] = details::upc(int8Input[1]);
+            int8Input[1] = details::upc(int8Input[0]);
+        };
+
+        auto fp16ref = int{*((uint16_t*)expected.data() + i)};
+        auto fp16out = int{*((uint16_t*)actual.data() + i)};
+
+#define _P(x) std::dec << std::setw(5) << (x) << '(' << std::setw(4) << std::hex << (x) << ')'
+        if (fp16ref != fp16out) {
+            return ::testing::AssertionFailure() << std::dec << std::setw(4) << i << ", i4:"
+                                                 << std::setw(2) << int8Input[i % 2]
+                                                 << " | ref " << _P(fp16ref)
+                                                 << ", test "  << _P(fp16out) << "\n";
+        }
+#undef  _P
+
+    }
+
+    return ::testing::AssertionSuccess();
+}
+
+}  // namespace details
+
+using ShapesInitializer = std::function<void (std::vector<int>&, std::vector<int>&, std::vector<int>&)>;
+
+
+using UnpackTestsParams = std::tuple<
+        ov::element::Type_t,  // fromPrecision
+        ov::element::Type_t,  // toPrecision
+        ov::element::Type_t,  // scalePrecision
+        ov::element::Type_t,  // zeroPointPrecision
+        unsigned long,        // nPartitions
+        ShapesInitializer,    // input_shape , scale_shape, zerop initializer
+        bool,                 // use parallel_for
+        bool                  // strict partitioning
+        >;
+
+class UnpackTestsBase {
+protected:
+    ov::element::Type fromType;
+    ov::element::Type toType;
+    ov::element::Type scaleType;
+    ov::element::Type zeropType;
+    std::shared_ptr<ov::ITensor> from, to, scale, zerop;
+
+    std::vector<int8_t> input;
+    std::vector<int8_t> output;
+    std::vector<int8_t> ref_output;
+    std::vector<int8_t> scalesStorage;
+    std::vector<int8_t> zeropStorage;
+    float zeropValue;
+    ov::Shape input_shape;
+    ov::Shape scale_shape;
+    ov::Shape zerop_shape;
+
+    size_t nPartitions;
+    bool useParallelFor = false;
+    bool strictPartitions = false;
+
+    void make_zeropoints() {
+        if (zeropType == ov::element::undefined) {
+            return;
+        }
+
+        const std::vector<float> zeropValues = {15.0f, 12.0f, 0.0f, 31.0f};
+        const size_t nElements = shape_size(zerop_shape);
+
+        // Set zeropValue if there's only one element
+        if (nElements == 1) {
+            zeropValue = zeropValues.front();
+        }
+
+        // Determine the size of the storage based on the type and resize the storage vector
+        if (zeropType == ov::element::Type_t::u4) {
+            zeropStorage.resize((nElements + 1) / 2, 0); // Each u4 zeropoint is 4 bits, so two zeropoints fit in one byte
+        } else if (zeropType == ov::element::Type_t::f32) {
+            zeropStorage.resize(nElements * sizeof(float), 0);
+        } else {
+            ASSERT_TRUE(zeropType == ov::element::u4 || zeropType == ov::element::f32);
+        }
+
+        // Fill the storage with the appropriate values
+        if (zeropType == ov::element::Type_t::u4) {
+            for (size_t i = 0; i < nElements; ++i) {
+                uint8_t zeropValueU4 = static_cast<uint8_t>(zeropValues[i % zeropValues.size()]) & 0x0F;
+                size_t byteIndex = i / 2;
+                if (i % 2 == 0) {
+                    zeropStorage[byteIndex] = zeropValueU4;
+                } else {
+                    zeropStorage[byteIndex] = (zeropValueU4 << 4);
+                }
+            }
+        } else if (zeropType == ov::element::Type_t::f32) {
+            float* ptrWork = reinterpret_cast<float*>(zeropStorage.data());
+            for (size_t i = 0; i < nElements; ++i) {
+                ptrWork[i] = zeropValues[i % zeropValues.size()];
+            }
+        }
+
+        // Create the tensor
+        zerop = ov::make_tensor(zeropType, zerop_shape, zeropStorage.data());
+    }
+
+    void make_scales() {
+        if (scaleType == ov::element::undefined) {
+            return;
+        }
+        ASSERT_TRUE(scaleType == ov::element::f16 || scaleType == ov::element::f32);
+        size_t nElements = shape_size(scale_shape);
+
+        // creating custom scale factors
+        const size_t nScaleBytes  = scaleType.bitwidth() * nElements  / 8;
+
+        std::vector<float> sc(nElements);
+        float coeffTable[] = {
+                0.1f,
+                0.5f,
+                1.f,
+                2.f
+        };
+        for (size_t i = 0; i != nElements; i++) {
+            sc[i] = coeffTable[i % (sizeof (coeffTable) / sizeof(*coeffTable))];
+        }
+        scalesStorage.resize(nScaleBytes);
+
+        if (scaleType == ov::element::f16) {
+            uint16_t * ptrWork = reinterpret_cast<uint16_t*>(scalesStorage.data());
+            for (size_t i = 0; i != nElements; i++) {
+                ptrWork[i] = details::float_to_half(sc[i]);
+            }
+        }
+        if (scaleType == ov::element::f32) {
+            float* ptrWork = reinterpret_cast<float*>(scalesStorage.data());
+            for (size_t i = 0; i != nElements; i++) {
+                ptrWork[i] = sc[i];
+            }
+        }
+        scale = ov::make_tensor(scaleType, scale_shape, scalesStorage.data());
+    }
+
+    void make_input() {
+
+        size_t nElements = shape_size(input_shape);
+
+        ASSERT_EQ((fromType.bitwidth() * nElements) % 8, 0) << "Input len has to be byte boundary aligned, but was "
+                                                            << fromType.bitwidth() * nElements << " bits";
+        ASSERT_EQ((toType.bitwidth() * nElements) % 8, 0) << "Output len has to be byte boundary aligned";
+
+        const size_t nInputBytes  = fromType.bitwidth() * nElements  / 8;
+        const size_t nOutputBytes = toType.bitwidth() * nElements  / 8;
+
+        input.resize(nInputBytes);
+        ref_output.resize(nOutputBytes);
+        output.resize(nOutputBytes);
+        std::fill(ref_output.begin(), ref_output.end(), 0);
+        std::fill(output.begin(), output.end(), 0);
+
+        std::array<int8_t, 32> input_local = {
+                0x0A, 0x0B, 0x1C, 0x1D, 0x2E, 0x2F, 0x35, 0x36,
+                0x4A, 0x4B, 0x5A, 0x5B, 0x6A, 0x6B, 0x7A, 0x7B,
+                0x0C, 0x0D, 0x1C, 0x1D, 0x2C, 0x2D, 0x3C, 0x3D,
+                0x4C, 0x4D, 0x5C, 0x5D, 0x6C, 0x6D, 0x7C, 0x7D,
+        };
+
+        for (size_t idx = 0, k = 0; k < nInputBytes; k++, idx = (idx + 1) % input_local.size()) {
+            input[k] = input_local[idx];
+        }
+
+        from = ov::make_tensor(fromType, input_shape, input.data());
+        to = ov::make_tensor(toType, input_shape, output.data());
+    }
+public:
+    void SetUp(const UnpackTestsParams & getParam) {
+        ShapesInitializer shapeInit;
+
+        std::tie(fromType, toType, scaleType, zeropType, nPartitions, shapeInit, useParallelFor, strictPartitions) = getParam;
+
+        std::vector<int> input, scale, zerop;
+        shapeInit(input, scale, zerop);
+
+        input_shape = ov::Shape{input.begin(), input.end()};
+        scale_shape = ov::Shape{scale.begin(), scale.end()};
+        if (zerop.empty()) {
+            zerop_shape = ov::Shape({1});
+        } else {
+            zerop_shape = ov::Shape{zerop.begin(), zerop.end()};
+        }
+
+        make_input();
+        make_scales();
+        make_zeropoints();
+
+        make_ref_output();
+    }
+    std::string ToString() const {
+        std::ostringstream result;
+        result << (isNegative() ? "NEGATIVE_" : "")
+               <<"[";
+
+        for (size_t i = 0; i != input_shape.size(); i++) {
+            result << input_shape[i] << ((i + 1 == input_shape.size()) ? "" : "x");
+        }
+        result <<"]"
+               << "_p" << nPartitions
+               << (strictPartitions ? "_SP" : "")
+               << (useParallelFor ? "_parallel" : "_serial")
+               << "_from_" << fromType
+               << "_to_" << toType;
+        if (scaleType != ov::element::Type_t::undefined)
+            result << "_scale_" << scaleType;
+        if (zeropType != ov::element::Type_t::undefined)
+            result << "_zerop_" << zeropType;
+
+        return result.str();
+    }
+
+    /**
+     * Negative test cases has to be carefully reviewed, to still remain positive runs at some points
+     * @return
+     */
+    virtual bool isNegative() const {
+        return false;
+    }
+
+    virtual void make_ref_output() {
+        size_t nElements = 1;
+        for (size_t dim : input_shape) {
+            nElements *= dim;
+        }
+        if (toType == ov::element::i8) {
+            details::unpack(input.data(), ref_output.data(), static_cast<int>(nElements));
+        } else if (toType == ov::element::f16) {
+            details::unpack_i4f16(input.data(), ref_output.data(), static_cast<int>(nElements));
+        }
+    }
+};
+
+template <class T>
+class UnpackTestsTmpl :
+        public ::testing::Test,
+        public T,
+        public ::testing::WithParamInterface<UnpackTestsParams> {
+protected:
+
+    void SetUp() override {
+        T::SetUp(GetParam());
+    }
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<UnpackTestsParams>& obj) {
+        T _bt;
+        _bt.SetUp(obj.param);
+        return _bt.ToString();
+    }
+};
+
+using UnpackTests = UnpackTestsTmpl<UnpackTestsBase>;
+class UnpackTestsRef : public UnpackTests {};
+
+TEST_P(UnpackTests, i4) {
+    ASSERT_NO_THROW_WITH_MESSAGE(ov::npuw::util::unpack(from, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input));
+}
+
+class UnpackWithScaleTestsBase : public UnpackTestsBase {
+protected:
+    bool isNegative() const override {
+        if (scale_shape.size() != 3 && scale_shape.size() != 2) return true;
+        if (input_shape.back() % 64) return true;
+        if ((from->get_size() / scale->get_size()) % 64) return true;
+        if (toType != ov::element::f16) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+
+        const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size();
+
+        details::unpack_i4f16(input.data(), ref_output.data(), static_cast<int>(nElements));
+
+        // lets apply per channel scale
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t i = 0; i < scale->get_size(); i++) {
+            for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) {
+                float ref_scaled = details::half_to_float(pRef[0]);
+                if (scaleType == ov::element::f32) {
+                    ref_scaled *= pScale_f32[0];
+                } else if (scaleType == ov::element::f16) {
+                    ref_scaled *= details::half_to_float(pScale_f16[0]);
+                }
+                *pRef = details::float_to_half(ref_scaled);
+                pRef++;
+            }
+            pScale_f32++;
+            pScale_f16++;
+        }
+    }
+
+};
+
+using UnpackWithScaleTests = UnpackTestsTmpl<UnpackWithScaleTestsBase>;
+
+
+TEST_P(UnpackWithScaleTests, i4_scale) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input));
+    }
+}
+
+
+class UnpackTestsWithScaleAndZeroPointBase : public UnpackTestsBase {
+protected:
+    bool isNegative() const override {
+        if (scale_shape.size() != 3 && scale_shape.size() != 2) return true;
+        if (input_shape.back() % 64) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+
+        const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size();
+
+        std::vector<float> floatRef(nElements);
+        details::unpack_u4f32(input.data(), floatRef.data(), static_cast<int>(nElements));
+
+
+        // lets apply per channel scale
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        float * pFloatRef = reinterpret_cast<float*>(floatRef.data());
+        const uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        const float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t i = 0; i < scale->get_size(); i++) {
+            for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) {
+                // applying zeropoint
+                float ref_scaled = *pFloatRef - zeropValue;
+
+                if (scaleType == ov::element::f32) {
+                    ref_scaled *= pScale_f32[0];
+                } else if (scaleType == ov::element::f16) {
+                    ref_scaled *= details::half_to_float(pScale_f16[0]);
+                }
+                *pRef = details::float_to_half(ref_scaled);
+
+                pFloatRef++;
+                pRef++;
+            }
+            pScale_f32++;
+            pScale_f16++;
+        }
+    }
+};
+
+using UnpackTestsWithScaleAndZeroPoint = UnpackTestsTmpl<UnpackTestsWithScaleAndZeroPointBase>;
+
+TEST_P(UnpackTestsWithScaleAndZeroPoint, u4) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false));
+    }
+}
+
+class UnpackTestsWithScaleAndZeroPoint2 : public UnpackTestsWithScaleAndZeroPointBase {
+protected:
+    bool isNegative() const override {
+        if (input_shape.back() % 64 || input_shape.size() != 3) return true;
+        if (scale_shape.back() % 64 || scale_shape.size() != 3) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+        const auto from_shape = from->get_shape();
+
+        const size_t C = from_shape[from_shape.size() - 3];
+        const size_t H = from_shape[from_shape.size() - 2];
+        const size_t W = from_shape[from_shape.size() - 1];
+
+        std::vector<float> floatRef(nElements);
+        details::unpack_u4f32(input.data(), floatRef.data(), static_cast<int>(nElements));
+
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        float * pFloatRef = reinterpret_cast<float*>(floatRef.data());
+        const uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        const float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t c = 0; c < C; ++c) {
+            for (size_t h = 0; h < H; ++h) {
+                for (size_t w = 0; w < W; ++w) {
+                    size_t input_index =  w + W * h + W * H * c;
+                    size_t scale_index = w + W * c;
+                    float ref_scaled = pFloatRef[input_index] - zeropValue;
+                    if (scaleType == ov::element::f32) {
+                        ref_scaled *= pScale_f32[scale_index];
+                    } else if (scaleType == ov::element::f16) {
+                        ref_scaled *= details::half_to_float(pScale_f16[scale_index]);
+                    }
+                    pRef[w + W * h + c * W * H] = details::float_to_half(ref_scaled);
+                }
+            }
+        }
+    }
+};
+
+using UnpackTestsWithScaleAndZeroPointTest2 = UnpackTestsTmpl<UnpackTestsWithScaleAndZeroPoint2>;
+
+TEST_P(UnpackTestsWithScaleAndZeroPointTest2, u4) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false));
+    }
+}
+
+class UnpackTestsWithScaleAndZeroPoint3 : public UnpackTestsWithScaleAndZeroPointBase {
+protected:
+    bool isNegative() const override {
+        if (scale_shape.size() != 3 || zerop_shape.size() != 3) return true;
+        if (input_shape[2] % 64 || input_shape.size() != 3) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+
+        const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size();
+
+        std::vector<float> floatRef(nElements);
+        details::unpack_u4f32(input.data(), floatRef.data(), static_cast<int>(nElements));
+
+
+        // lets apply per channel scale
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        const uint8_t* pZer = static_cast<uint8_t*>(zerop->data());
+        float * pFloatRef = reinterpret_cast<float*>(floatRef.data());
+        const uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        const float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t i = 0; i < scale->get_size(); i++) {
+            float zeroPointValue = static_cast<float>((i % 2 == 0) ? details::lo4(pZer[i / 2]) : details::hi4(pZer[i / 2]));
+            for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) {
+                // applying zeropoint
+                float ref_scaled = *pFloatRef - zeroPointValue;
+
+                if (scaleType == ov::element::f32) {
+                    ref_scaled *= pScale_f32[0];
+                } else if (scaleType == ov::element::f16) {
+                    ref_scaled *= details::half_to_float(pScale_f16[0]);
+                }
+                *pRef = details::float_to_half(ref_scaled);
+
+                pFloatRef++;
+                pRef++;
+            }
+            pScale_f32++;
+            pScale_f16++;
+        }
+    }
+};
+
+using UnpackTestsWithScaleAndZeroPointTest3 = UnpackTestsTmpl<UnpackTestsWithScaleAndZeroPoint3>;
+
+TEST_P(UnpackTestsWithScaleAndZeroPointTest3, u4) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false));
+    }
+}
+
+#define Tensors [](std::vector<int>& input, std::vector<int>&scale, std::vector<int>&zerop)
+
+
+namespace details {
+::testing::internal::ParamGenerator<typename std::vector<ShapesInitializer>::value_type> ShapesIn(
+        const std::vector<ShapesInitializer>& container) {
+    return ::testing::ValuesIn(container.begin(), container.end());
+}
+
+}  // namespace details
+}  // anonymous namespace