From 0ae44841dc164c9abd7a731bbfd35b95644cd19c Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Thu, 3 Oct 2024 15:44:09 +0400
Subject: [PATCH 1/4] [PT FE][GHA] Run PT FE layer tests on Ubuntu 24.04 with
 Python 3.12 and NumPy 2.X (#26886)

**Details:** Run PT FE layer tests on Ubuntu 24.04 with Python 3.12 and
NumPy 2.X
Also, this PR contains fixes:
- WA sporadic bug on Windows in case parallel run
- support PT FE and TF FE layer tests on MacOS x86
- leftovers from code-review

**Tickets:** 154003, 153800

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .github/workflows/job_pytorch_layer_tests.yml | 39 +++++++------------
 .../workflows/job_tensorflow_layer_tests.yml  | 15 ++++---
 .github/workflows/linux_arm64.yml             |  6 +--
 .github/workflows/mac.yml                     |  6 +--
 .github/workflows/mac_arm64.yml               |  6 +--
 .github/workflows/ubuntu_22.yml               |  6 +--
 .github/workflows/ubuntu_24.yml               | 10 +++++
 .github/workflows/windows_vs2019_release.yml  |  6 +--
 .../pytorch_tests/test_bitwise_ops.py         |  6 ++-
 tests/requirements_pytorch                    | 26 +++++++------
 tests/requirements_tensorflow                 |  3 +-
 11 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index 88b41f983f7094..50942cf331ab72 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -7,10 +7,6 @@ on:
         description: 'Machine on which the tests would run'
         type: string
         required: true
-      shell:
-        description: "shell to override the default shell settings in the runner's operating system."
-        type: string
-        required: true
       container:
         description: 'JSON to be converted to the value of the "container" configuration for the job'
         type: string
@@ -20,12 +16,15 @@ on:
         description: 'Components that are affected by changes in the commit defined by the Smart CI Action'
         type: string
         required: true
+      python-version:
+        description: 'Python version to setup. E.g., "3.11"'
+        type: string
+        required: true
 
 permissions: read-all
 
 env:
   PIP_CACHE_PATH: /mount/caches/pip/linux
-  PYTHON_VERSION: '3.11'
 
 jobs:
   PyTorch_Layer_Tests:
@@ -35,7 +34,7 @@ jobs:
     container: ${{ fromJSON(inputs.container) }}
     defaults:
       run:
-        shell: ${{ inputs.shell }}
+        shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }}
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       OPENVINO_REPO: ${{ github.workspace }}/openvino
@@ -55,12 +54,6 @@ jobs:
           name: openvino_tests
           path: ${{ env.INSTALL_TEST_DIR }}
 
-      - name: Download OpenVINO tokenizers extension
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: openvino_tokenizers_wheel
-          path: ${{ env.INSTALL_DIR }}
-
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
         if: runner.os != 'Windows'
@@ -98,10 +91,10 @@ jobs:
           sparse-checkout-cone-mode: false
           path: 'openvino'
 
-      - name: Setup Python ${{ env.PYTHON_VERSION }}
+      - name: Setup Python ${{ inputs.python-version }}
         uses: ./openvino/.github/actions/setup_python
         with:
-          version: ${{ env.PYTHON_VERSION }}
+          version: ${{ inputs.python-version }}
           pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }}
           should-setup-pip-paths: ${{ runner.os == 'Linux' }}
           self-hosted-runner: ${{ runner.os == 'Linux' }}
@@ -112,9 +105,6 @@ jobs:
           # Install the core OV wheel
           python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl
 
-          # Install the core OV Tokenizers wheel
-          python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*.whl
-
       - name: Install OpenVINO Python wheels (Windows)
         if: runner.os == 'Windows'
         run: |
@@ -122,10 +112,6 @@ jobs:
           $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }}\tools -Filter openvino-*.whl | % { $_.FullName }
           python3 -m pip install "$ovCoreWheelPath"
 
-          # Find and install the core OV Tokenizers wheel
-          $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }} -Filter openvino_tokenizers-*.whl | % { $_.FullName }
-          python3 -m pip install "$ovCoreWheelPath"
-
       - name: Install Pytorch Layer tests dependencies
         run: |
           # pytorch test requirements
@@ -133,22 +119,25 @@ jobs:
 
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196
-        run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -n logical -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml
+        # due to CVS-152795, parallel run is not possible on Windows
+        run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml
         env:
           TEST_DEVICE: CPU
           TEST_PRECISION: FP32
+          PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}}
 
       - name: PyTorch torch.export Layer Tests
-        if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287
+        if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287
         run: |
-          python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -n logical -m precommit_torch_export --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml
+          python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit_torch_export --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml
         env:
           TEST_DEVICE: CPU
           TEST_PRECISION: FP32
           PYTORCH_TRACING_MODE: EXPORT
+          PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}}
 
       - name: PyTorch torch.compile TORCHFX Layer Tests
-        if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.os != 'macOS' && runner.arch != 'ARM64' }} # Ticket: 126287
+        if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.os != 'macOS' && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287
         run: |
           python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit_fx_backend --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml
         env:
diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml
index 0801010b86bde3..e8d7b51e14c02f 100644
--- a/.github/workflows/job_tensorflow_layer_tests.yml
+++ b/.github/workflows/job_tensorflow_layer_tests.yml
@@ -7,10 +7,6 @@ on:
         description: 'Machine on which the tests would run'
         type: string
         required: true
-      shell:
-        description: "shell to override the default shell settings in the runner's operating system."
-        type: string
-        required: true
       container:
         description: 'JSON to be converted to the value of the "container" configuration for the job'
         type: string
@@ -20,12 +16,15 @@ on:
         description: 'Components that are affected by changes in the commit defined by the Smart CI Action'
         type: string
         required: true
+      python-version:
+        description: 'Python version to setup. E.g., "3.11"'
+        type: string
+        required: true
 
 permissions: read-all
 
 env:
   PIP_CACHE_PATH: /mount/caches/pip/linux
-  PYTHON_VERSION: '3.11'
 
 jobs:
   TensorFlow_Layer_Tests:
@@ -35,7 +34,7 @@ jobs:
     container: ${{ fromJSON(inputs.container) }}
     defaults:
       run:
-        shell: ${{ inputs.shell }}
+        shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }}
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       OPENVINO_REPO: ${{ github.workspace }}/openvino
@@ -98,10 +97,10 @@ jobs:
           sparse-checkout-cone-mode: false
           path: 'openvino'
 
-      - name: Setup Python ${{ env.PYTHON_VERSION }}
+      - name: Setup Python ${{ inputs.python-version }}
         uses: ./openvino/.github/actions/setup_python
         with:
-          version: ${{ env.PYTHON_VERSION }}
+          version: ${{ inputs.python-version }}
           pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }}
           should-setup-pip-paths: ${{ runner.os == 'Linux' }}
           self-hosted-runner: ${{ runner.os == 'Linux' }}
diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml
index 3506ca49846f45..e4e608f3aca6d4 100644
--- a/.github/workflows/linux_arm64.yml
+++ b/.github/workflows/linux_arm64.yml
@@ -173,19 +173,19 @@ jobs:
     uses: ./.github/workflows/job_tensorflow_layer_tests.yml
     with:
       runner: 'aks-linux-16-cores-arm'
-      shell: bash
       container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}'
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   Pytorch_Layer_Tests:
     name: Pytorch Layer Tests
-    needs: [ Build, Docker, Smart_CI, Openvino_tokenizers ]
+    needs: [ Build, Docker, Smart_CI ]
     uses: ./.github/workflows/job_pytorch_layer_tests.yml
     with:
       runner: 'aks-linux-16-cores-arm'
-      shell: bash
       container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}'
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   CPU_Functional_Tests:
     name: CPU functional tests
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index da3224fa483ad1..20db9de1776015 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -276,17 +276,17 @@ jobs:
     uses: ./.github/workflows/job_tensorflow_layer_tests.yml
     with:
       runner: 'macos-13'
-      shell: bash
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   Pytorch_Layer_Tests:
     name: Pytorch Layer Tests
-    needs: [ Build, Smart_CI, Openvino_tokenizers ]
+    needs: [ Build, Smart_CI ]
     uses: ./.github/workflows/job_pytorch_layer_tests.yml
     with:
       runner: 'macos-13'
-      shell: bash
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   CPU_Functional_Tests:
     name: CPU functional tests
diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml
index 331afc7266cd6a..a38179f71fb60c 100644
--- a/.github/workflows/mac_arm64.yml
+++ b/.github/workflows/mac_arm64.yml
@@ -275,17 +275,17 @@ jobs:
     uses: ./.github/workflows/job_tensorflow_layer_tests.yml
     with:
       runner: 'macos-13-xlarge'
-      shell: bash
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   Pytorch_Layer_Tests:
     name: Pytorch Layer Tests
-    needs: [ Build, Smart_CI, Openvino_tokenizers ]
+    needs: [ Build, Smart_CI ]
     uses: ./.github/workflows/job_pytorch_layer_tests.yml
     with:
       runner: 'macos-13-xlarge'
-      shell: bash
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   CPU_Functional_Tests:
     name: CPU functional tests
diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml
index 8f461391f20a9f..2c20e5136cfc4e 100644
--- a/.github/workflows/ubuntu_22.yml
+++ b/.github/workflows/ubuntu_22.yml
@@ -305,19 +305,19 @@ jobs:
     uses: ./.github/workflows/job_tensorflow_layer_tests.yml
     with:
       runner: 'aks-linux-4-cores-16gb'
-      shell: bash
       container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}'
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   Pytorch_Layer_Tests:
     name: Pytorch Layer Tests
-    needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ]
+    needs: [ Docker, Build, Smart_CI ]
     uses: ./.github/workflows/job_pytorch_layer_tests.yml
     with:
       runner: 'aks-linux-4-cores-16gb'
-      shell: bash
       container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}'
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   CPU_Functional_Tests:
     name: CPU functional tests
diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml
index 6409b417a0731b..295a4dd0e2c61a 100644
--- a/.github/workflows/ubuntu_24.yml
+++ b/.github/workflows/ubuntu_24.yml
@@ -133,6 +133,16 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.12'
 
+  Pytorch_Layer_Tests:
+    name: Pytorch Layer Tests
+    needs: [ Docker, Build, Smart_CI ]
+    uses: ./.github/workflows/job_pytorch_layer_tests.yml
+    with:
+      runner: 'aks-linux-4-cores-16gb'
+      container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.12'
+
   Overall_Status:
     name: ci/gha_overall_status_ubuntu_24
     needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests]
diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml
index 39cf2161525513..122fcc3c1c5021 100644
--- a/.github/workflows/windows_vs2019_release.yml
+++ b/.github/workflows/windows_vs2019_release.yml
@@ -404,17 +404,17 @@ jobs:
     uses: ./.github/workflows/job_tensorflow_layer_tests.yml
     with:
       runner: 'aks-win-8-cores-16gb'
-      shell: pwsh
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   Pytorch_Layer_Tests:
     name: Pytorch Layer Tests
-    needs: [ Build, Smart_CI, Openvino_tokenizers ]
+    needs: [ Build, Smart_CI ]
     uses: ./.github/workflows/job_pytorch_layer_tests.yml
     with:
       runner: 'aks-win-8-cores-16gb'
-      shell: pwsh
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
 
   CXX_Unit_Tests:
     name: C++ unit tests
diff --git a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py
index 1cf458500bcc71..e55a86f279de21 100644
--- a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py
+++ b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 import torch
+from packaging import version
+
 from pytorch_layer_test_class import PytorchLayerTest, skip_if_export
 
 
@@ -69,10 +71,12 @@ def forward_not_out(self, tensor_a, out):
     )
     @pytest.mark.parametrize("out", [False, skip_if_export(True)])
     def test_bitwise_mixed_dtypes(
-        self, op_type, out, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version
+            self, op_type, out, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version
     ):
         if ie_device == "GPU" and (lhs_dtype != "bool" or rhs_dtype != "bool"):
             pytest.xfail(reason="bitwise ops are not supported on GPU")
+        if out and version.parse(np.__version__) >= version.parse("2.0.0"):
+            pytest.xfail(reason="CVS-154082: incorrect handling out type")
         self._test(
             *self.create_model(op_type, out),
             ie_device,
diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index b82e0c76409057..0d5ac61903b104 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -1,10 +1,14 @@
+# test ovc with NumPy 2.x on Ubuntu 24 with default Python 3.12
+# test against NumPy 1.x with older Python versions
 # optimum still requires numpy<2.0.0
-numpy==1.26.4
+numpy==1.26.4; python_version < "3.12"
+numpy==2.1.1; python_version >= "3.12"
 torch==2.4.1; platform_system != "Darwin" or platform_machine != "x86_64"
-torch==2.2.0; platform_system == "Darwin" and platform_machine == "x86_64"
+torch==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64"
 --extra-index-url https://download.pytorch.org/whl/cpu
 
-torchvision==0.19.1
+torchvision==0.19.1; platform_system != "Darwin" or platform_machine != "x86_64"
+torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
 # transformers 4.45.1 is available
 # but optimum still requires <4.45.0
 transformers==4.44.2
@@ -13,22 +17,22 @@ pytest-html==4.1.1
 pytest-xdist[psutil]==3.6.1
 defusedxml==0.7.1
 
-auto-gptq==0.7.1; platform_system == "Linux" and platform_machine == "x86_64"
+auto-gptq==0.7.1; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12"
 av==13.0.0
-basicsr==1.4.2
+basicsr==1.4.2; python_version < "3.12"
 datasets==3.0.1
 easyocr==1.7.2
-facexlib==0.3.0
-librosa==0.10.2
-optimum==1.22.0
+facexlib==0.3.0; python_version < "3.12"
+librosa==0.10.2; python_version < "3.12"
+optimum==1.22.0; python_version < "3.12"
 packaging==24.1
 pandas==2.2.3
 protobuf==5.28.2
-pyctcdecode==0.5.0
+pyctcdecode==0.5.0; python_version < "3.12"
 sacremoses==0.1.1
 sentencepiece==0.2.0
 soundfile==0.12.1
-super-image==0.1.7
+super-image==0.1.7; python_version < "3.12"
 timm==1.0.8
 torchaudio==2.4.1
 wheel==0.44.0
@@ -36,7 +40,7 @@ PyYAML==6.0.2
 kornia==0.7.3
 
 # use latest released version once it's available
-git+https://github.com/huggingface/optimum-intel.git@main
+git+https://github.com/huggingface/optimum-intel.git@main; python_version < "3.12"
 # set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer
 hf_transfer==0.1.8
 
diff --git a/tests/requirements_tensorflow b/tests/requirements_tensorflow
index 9d025397ed1fbd..6042eb8a46a9c3 100644
--- a/tests/requirements_tensorflow
+++ b/tests/requirements_tensorflow
@@ -4,7 +4,8 @@ pytest==7.0.1
 pytest-xdist[psutil]==3.6.1
 pytest-html==4.1.1
 transformers==4.45.1
-tensorflow==2.17.0
+tensorflow==2.17.0; platform_system != "Darwin" or platform_machine != "x86_64"
+tensorflow==2.16.2; platform_system == "Darwin" and platform_machine == "x86_64"
 # tensorflow-text is not available for both Windows and ARM platforms
 tensorflow-text==2.17.0; platform_system == "Linux" and platform_machine == "x86_64"
 tensorflow-hub==0.16.1

From 1b892bfb00fcbccec8db96f66a86e3b1e01f6262 Mon Sep 17 00:00:00 2001
From: Pavel Durandin <pavel.durandin@intel.com>
Date: Thu, 3 Oct 2024 13:43:02 +0400
Subject: [PATCH 2/4] [GPU] Fix double jit constants (#26893)

### Details:
 - Fix double constant definition
---
 .../fully_connected/fully_connected_kernel_bf_tiled.cpp      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
index 24641f3eb6aab0..c4115d74f54a92 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -534,6 +534,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para
     size_t tile_k_ofm_packed = tile_k_ofm;
     size_t quantize_grp_size = get_dynamic_quantize_group_size(params);
 
+    bool add_decompress_scale_post_op = false;
     WeightsType weights_dt = params.weights.GetDType();
     if (weights_dt == WeightsType::UINT4 || weights_dt == WeightsType::INT4) {
         tile_k_ofm_packed /= 2;
@@ -542,7 +543,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para
         const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v;
         // Do not use SCALE_POST_OP for SLM kernel, since it demonstrates worse performance
         if (scale_group_size % simd == 0 && !dispatchData.use_slm)
-            jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1));
+            add_decompress_scale_post_op = true;
     }
     if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) {
         jit.AddConstant(MakeJitConstant("W_IDX", "fi * TILE_K + kii"));
@@ -619,6 +620,8 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para
         jit.AddConstant(MakeJitConstant("DQ_TYPE", "char"));
         jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", quantize_grp_size));
     } else {
+        if (add_decompress_scale_post_op)
+            jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1));
         jit.AddConstant(MakeJitConstant("DYNAMIC_QUANTIZE", 0));
         jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", min_quantize_grp_size));
     }

From 4254c13364ac212e47590184d82c6746bd36aae5 Mon Sep 17 00:00:00 2001
From: "Anastasiya(Asya) Pronina" <anastasiya.pronina@intel.com>
Date: Thu, 3 Oct 2024 12:27:03 +0200
Subject: [PATCH 3/4] NPUW: Bring back unpack and partitioning unit tests for
 NPUW (#26885)

This PR adds unit tests on

1. unpack routines within NPUW
2. main online partitioning functionality (smaller unit tests on Graph,
Group, Repeated, etc will be added separately)

Brings back https://github.com/openvinotoolkit/openvino/pull/25780

Local run:

```
[----------] Global test environment tear-down
[==========] 334 tests from 6 test suites ran. (3379 ms total)
[  PASSED  ] 334 tests.
```

---------

Co-authored-by: Alexey Smirnov <alexey.smirnov@intel.com>
Co-authored-by: Dmitry Matveev <dmitry.matveev@intel.com>
---
 .../npuw/partitioning/online/snapshot.hpp     |  16 +-
 src/plugins/intel_npu/tests/CMakeLists.txt    |   1 +
 .../intel_npu/tests/unit/CMakeLists.txt       |  46 ++
 .../tests/unit/npuw/online_partitioning.cpp   | 692 ++++++++++++++++++
 .../intel_npu/tests/unit/npuw/unpack.cpp      | 103 +++
 .../intel_npu/tests/unit/npuw/unpack.hpp      | 628 ++++++++++++++++
 6 files changed, 1478 insertions(+), 8 deletions(-)
 create mode 100644 src/plugins/intel_npu/tests/unit/CMakeLists.txt
 create mode 100644 src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp
 create mode 100644 src/plugins/intel_npu/tests/unit/npuw/unpack.cpp
 create mode 100644 src/plugins/intel_npu/tests/unit/npuw/unpack.hpp

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp
index 72a62781580cda..e7e5121b1240e7 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp
@@ -16,8 +16,6 @@ namespace ov {
 namespace npuw {
 namespace online {
 
-class Group;  // forward declaration
-
 namespace detail {
 // At partitioning level we exclude some "non-Ops" to not interfere with the passes.
 // We include some of them back to properly link everything at plugin level
@@ -33,6 +31,8 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {
           m_node_to_prod_cons(std::make_shared<detail::OVNodeMap>()),
           m_node_to_gr(std::make_shared<detail::OVNodeToGroupMap>()) {}
 
+    friend class Group;  // forward declaration
+
     // Simple passes
     void singleGroup();
 
@@ -49,27 +49,27 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {
     void repeatedBlocks();
     void earlyAvoids();
     void earlyRegroup();
-    void markInternalCompute();
-    void resetExcludedRep();
 
     // Utility
     std::shared_ptr<own::ade::Graph> getGraph() const;
-    size_t graphSize() const;
-    const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const;
-    const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const;
     const detail::OVPortsMap& getPortsMap() const;
     const detail::OVNodeToGroupMapPtr& getNodeToGroupMap() const;
     const std::map<std::string, std::vector<std::set<std::string>>>& getMatches() const;
-    detail::GPtrSet getRepGroups(const std::shared_ptr<Group>& group) const;
     void repeat(detail::Pass&& pass);
     void setCtx(const PassContext& ctx);
+    size_t graphSize() const;
 
 private:
+    detail::GPtrSet getRepGroups(const std::shared_ptr<Group>& group) const;
+    const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const;
+    const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const;
     void identifyUniques();
     void mergeUniques();
     void mergeTriangles();
     void cleanUpUniques();
     void afterUniques();
+    void markInternalCompute();
+    void resetExcludedRep();
     bool cleanUpUniquesImpl(const detail::GPtrSet& gset);
     std::shared_ptr<Repeated> tryGrowRepeatingGroups(const detail::GPtrSet& repeating_groups);
     std::shared_ptr<Repeated> tryMergeTriangles(const detail::GPtrSet& repeating_groups);
diff --git a/src/plugins/intel_npu/tests/CMakeLists.txt b/src/plugins/intel_npu/tests/CMakeLists.txt
index 4c41f008eb7f81..0f5bd7a6b093b2 100644
--- a/src/plugins/intel_npu/tests/CMakeLists.txt
+++ b/src/plugins/intel_npu/tests/CMakeLists.txt
@@ -8,3 +8,4 @@ if (MSVC)
     ov_add_compiler_flags(/wd5105)
 endif()
 add_subdirectory(functional)
+add_subdirectory(unit)
diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt
new file mode 100644
index 00000000000000..861a0ff6a47076
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/CMakeLists.txt
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "ov_npu_unit_tests")
+
+set(MANDATORY_UNIT_TESTS_LIBS
+        "openvino::commonTestUtils"
+        "openvino::gmock"
+        "openvino::gtest"
+        "openvino::gtest_main"
+        "openvino::runtime"
+        "openvino::npu_al"
+        "openvino::npu_logger_utils"
+)
+
+ov_add_test_target(
+        NAME ${TARGET_NAME}
+        ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+        ADDITIONAL_SOURCE_DIRS
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw/
+        DEPENDENCIES
+            openvino::runtime
+        INCLUDES
+            ${CMAKE_CURRENT_SOURCE_DIR}
+            ${CMAKE_CURRENT_SOURCE_DIR}/npuw
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include
+            ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include
+        LINK_LIBRARIES
+            ${MANDATORY_UNIT_TESTS_LIBS}
+        LABELS
+            NPUW
+)
+
+if(ENABLE_AVX2)
+    ov_avx2_optimization_flags(avx2_flags)
+    target_compile_options(${TARGET_NAME} PRIVATE "${avx2_flags}")
+endif()
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION tests
+        COMPONENT tests
+        EXCLUDE_FROM_ALL
+)
diff --git a/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp
new file mode 100644
index 00000000000000..af1fc5de8e92c7
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp
@@ -0,0 +1,692 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <iostream>
+
+#include "partitioning/online/compiler.hpp"
+#include "partitioning/online/snapshot.hpp"
+#include "partitioning/online/group.hpp"
+
+#include "intel_npu/al/config/config.hpp"
+#include "intel_npu/al/config/npuw.hpp"
+
+#include "openvino/openvino.hpp"
+#include "openvino/op/ops.hpp"
+#include "openvino/op/util/op_types.hpp"
+
+bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2);
+bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2) {
+    if (ens1.groups.size() != ens2.groups.size()) {
+        return false;
+    }
+
+    for (auto& g : ens1.groups) {
+        std::sort(g.input_layers.begin(), g.input_layers.end());
+        std::sort(g.output_layers.begin(), g.output_layers.end());
+        std::sort(g.all_layers.begin(), g.all_layers.end());
+    }
+
+    for (auto& g : ens2.groups) {
+        std::sort(g.input_layers.begin(), g.input_layers.end());
+        std::sort(g.output_layers.begin(), g.output_layers.end());
+        std::sort(g.all_layers.begin(), g.all_layers.end());
+    }
+
+    std::sort(ens1.groups.begin(), ens1.groups.end(), [](const ov::npuw::Group& g1,
+                                                         const ov::npuw::Group& g2){
+                                                                return g1.all_layers.front() < g2.all_layers.front();
+                                                        });
+
+    std::sort(ens2.groups.begin(), ens2.groups.end(), [](const ov::npuw::Group& g1,
+                                                         const ov::npuw::Group& g2){
+                                                                return g1.all_layers.front() < g2.all_layers.front();
+                                                        });
+
+    for (size_t i = 0; i < ens1.groups.size(); ++i) {
+        const auto& g1 = ens1.groups.at(i);
+        const auto& g2 = ens2.groups.at(i);
+
+        if (g1.avoid_list != g2.avoid_list ||
+            g1.input_layers != g2.input_layers ||
+            g1.output_layers != g2.output_layers ||
+            g1.all_layers != g2.all_layers) {
+            return false;
+        }
+
+        // Can't compare them directly since they are random, but dont't affect the structure
+        if ((g1.repeated_id.empty() && !g2.repeated_id.empty()) ||
+            (!g1.repeated_id.empty() && g2.repeated_id.empty())) {
+            return false;
+        }
+    }
+
+    if (ens1.repeated.size() != ens2.repeated.size()) {
+        return false;
+    }
+
+    auto get_sorted_rep = [](const std::map<std::string, ov::npuw::RepeatedBlock>& rep) {
+        std::vector<std::vector<std::set<std::string>>> sorted_rep;
+
+        std::transform(rep.begin(), rep.end(), std::back_inserter(sorted_rep), [](const auto& v) {
+            return v.second.matches;
+        });
+
+        for (auto& g : sorted_rep) {
+            std::sort(g.begin(), g.end(),
+                    [](const auto& a, const auto& b) {return *a.begin() < *b.begin();});
+        }
+
+        std::sort(sorted_rep.begin(), sorted_rep.end(),
+                    [](const auto& a, const auto& b) {return *a.front().begin() < *b.front().begin();});
+
+        return sorted_rep;
+    };
+
+
+    if (get_sorted_rep(ens1.repeated) != get_sorted_rep(ens2.repeated)) {
+        return false;
+    }
+
+    return true;
+}
+
+class ModelGenerator {
+public:
+    ModelGenerator() = default;
+
+    std::shared_ptr<ov::Model> get_model_without_repeated_blocks() {
+        std::shared_ptr<ov::op::v0::Parameter> input = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{1, 1, 40});
+        m_nodes.push_back(input);
+        set_name(input);
+
+        std::shared_ptr<ov::Node> res = get_block(input);
+
+        auto result = std::make_shared<ov::op::v0::Result>(res);
+        m_nodes.push_back(result);
+        set_name(result);
+
+        ov::ParameterVector params = {input};
+        ov::ResultVector results = {result};
+
+        return std::make_shared<ov::Model>(results, params);
+    }
+
+    std::shared_ptr<ov::Model> get_model_with_repeated_blocks() {
+        // Generate head
+        std::shared_ptr<ov::op::v0::Parameter> input = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{1, 1, 40});
+        m_nodes.push_back(input);
+        set_name(input);
+
+        std::vector<std::shared_ptr<ov::Node>> head(7, nullptr);
+        head[0] = std::make_shared<ov::op::v1::Add>(input, input);
+        head[1] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector<int>{2});
+        head[2] = std::make_shared<ov::op::v1::Divide>(head[0], head[1], true);
+        head[3] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 4, 10});
+        head[4] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int>{1, 1, 40});
+        head[5] = std::make_shared<ov::op::v1::Reshape>(head[2], head[3], false);
+        head[6] = std::make_shared<ov::op::v1::Reshape>(head[5], head[4], false);
+
+        for (const auto& h : head) {
+            m_nodes.push_back(h);
+            set_name(h);
+        }
+
+        // Generate repeated blocks
+        std::shared_ptr<ov::Node> output = get_block(head[6]);
+        std::vector<std::shared_ptr<ov::Node>> outputs;
+        outputs.push_back(output);
+
+        for (size_t i = 0; i < 9; ++i) {
+            output = get_block(output);
+            outputs.push_back(output);
+        }
+
+        // Generate tail
+        std::vector<std::shared_ptr<ov::Node>> tail(6, nullptr);
+        tail[0] = std::make_shared<ov::op::v0::Concat>(outputs, -1);
+        tail[1] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int>{1, 20, 20});
+        tail[2] = std::make_shared<ov::op::v1::Reshape>(tail[0], tail[1], false);
+        tail[3] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1});
+        tail[4] = std::make_shared<ov::op::v1::Multiply>(tail[2], tail[3]);
+        tail[5] = std::make_shared<ov::op::v1::Add>(tail[4], tail[4]);
+
+        for (const auto& t : tail) {
+            m_nodes.push_back(t);
+            set_name(t);
+        }
+
+        // Create model
+        auto result = std::make_shared<ov::op::v0::Result>(tail[5]);
+        m_nodes.push_back(result);
+        set_name(result);
+
+        ov::ParameterVector params = {input};
+        ov::ResultVector results = {result};
+
+        return std::make_shared<ov::Model>(results, params);
+    }
+
+    std::shared_ptr<ov::Node> get_block(const std::shared_ptr<ov::Node>& input) {
+        // Parameters
+        // input
+
+        // Constants
+        std::vector<std::shared_ptr<ov::Node>> model_c(18, nullptr);
+        model_c[0] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{0, 2, 1, 3});
+        model_c[1] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{1});
+        model_c[2] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{0});
+        model_c[3] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{2});
+        model_c[4] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{0});
+        model_c[5] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 1, 1});
+        model_c[6] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{1});
+        model_c[7] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{1}, std::vector<int>{0});
+        model_c[8] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 1, 1});
+        model_c[9] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{1, 1, 1, 2});
+        model_c[10] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{1, 1, 1, 1});
+        model_c[11] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int>{1, 1, 1, 2});
+        model_c[12] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1, 1});
+        model_c[13] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1, 1});
+        model_c[14] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{1, 1, 1, 1});
+        model_c[15] = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{40, 40});
+        model_c[16] = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int>{1, 1, 4, 10});
+        model_c[17] = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int>{1, 1, 40});
+
+        for (const auto& c : model_c) {
+            m_nodes.push_back(c);
+            set_name(c);
+        }
+
+        // Converts
+        std::vector<std::shared_ptr<ov::Node>> convert(3, nullptr);
+        convert[0] = std::make_shared<ov::op::v0::Convert>(model_c[15], ov::element::f16);
+        convert[1] = std::make_shared<ov::op::v0::Convert>(convert[0], ov::element::i32);
+        convert[2] = std::make_shared<ov::op::v0::Convert>(model_c[12], ov::element::i32);
+
+        for (const auto& c : convert) {
+            m_nodes.push_back(c);
+            set_name(c);
+        }
+
+        // Ops
+        std::vector<std::shared_ptr<ov::Node>> op(16, nullptr);
+        op[0] = std::make_shared<ov::op::v0::MatMul>(input, convert[1], false, true);
+        op[1] = std::make_shared<ov::op::v1::Reshape>(op[0], model_c[16], false);
+        op[2] = std::make_shared<ov::op::v1::Transpose>(op[1], model_c[0]);
+        op[3] = std::make_shared<ov::op::v0::ShapeOf>(op[2]);
+        op[4] = std::make_shared<ov::op::v1::Gather>(op[3], model_c[1], model_c[2]);
+        op[5] = std::make_shared<ov::op::v1::Divide>(op[4], model_c[3], true);
+        op[6] = std::make_shared<ov::op::v0::Floor>(op[5]);
+        op[7] = std::make_shared<ov::op::v3::ScatterUpdate>(model_c[5], model_c[6], op[6], model_c[7]);
+        op[8] = std::make_shared<ov::op::v1::StridedSlice>(op[2],
+                                                            model_c[8],
+                                                            op[7],
+                                                            model_c[9],
+                                                            std::vector<int64_t>{1, 1, 1, 1},
+                                                            std::vector<int64_t>{1, 1, 1, 1});
+        op[9] = std::make_shared<ov::op::v1::StridedSlice>(op[2],
+                                                            op[7],
+                                                            model_c[10],
+                                                            model_c[11],
+                                                            std::vector<int64_t>{1, 1, 1, 1},
+                                                            std::vector<int64_t>{1, 1, 1, 1});
+        op[10] = std::make_shared<ov::op::v1::Multiply>(op[9], convert[2]);
+        op[11] = std::make_shared<ov::op::v0::Concat>(std::vector<std::shared_ptr<ov::Node>>{op[10], op[8]}, -1);
+        op[12] = std::make_shared<ov::op::v1::Multiply>(model_c[13], op[11]);
+        op[13] = std::make_shared<ov::op::v1::Multiply>(model_c[14], op[2]);
+        op[14] = std::make_shared<ov::op::v1::Add>(op[13], op[12]);
+        op[15] = std::make_shared<ov::op::v1::Reshape>(op[14], model_c[17], false);
+
+        for (const auto& o : op) {
+            m_nodes.push_back(o);
+            set_name(o);
+        }
+
+        return op[15];
+    }
+
+private:
+    void set_name(const std::shared_ptr<ov::Node>& node) {
+        node->set_friendly_name("node_" + std::to_string(m_name_idx++));
+    }
+
+    std::vector<std::shared_ptr<ov::Node>> m_nodes;
+    size_t m_name_idx;
+};
+
+TEST(OnlinePartitioningTest, Partitioning_IsTheSame_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>();
+    auto cfg = ::intel_npu::Config(opt_desc);
+    ::intel_npu::registerNPUWOptions(*opt_desc);
+    std::map<std::string, std::string> cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }};
+    cfg.update(cfg_map);
+
+    auto ens = ov::npuw::online::buildPartitioning(model, cfg);
+
+    for (size_t i = 0; i < 100; ++i) {
+        auto ens_again = ov::npuw::online::buildPartitioning(model, cfg);
+        EXPECT_TRUE(isEqualEns(ens, ens_again));
+    }
+}
+
+TEST(OnlinePartitioningTest, Partitioning_IsTheSame_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>();
+    auto cfg = ::intel_npu::Config(opt_desc);
+    ::intel_npu::registerNPUWOptions(*opt_desc);
+    std::map<std::string, std::string> cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }};
+    cfg.update(cfg_map);
+
+    auto ens = ov::npuw::online::buildPartitioning(model, cfg);
+
+    for (size_t i = 0; i < 100; ++i) {
+        auto ens_again = ov::npuw::online::buildPartitioning(model, cfg);
+        EXPECT_TRUE(isEqualEns(ens, ens_again));
+    }
+}
+
+TEST(OnlinePartitioningTest, Partitioning_SingleGroup_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->singleGroup();
+    EXPECT_EQ(snap->graphSize(), 1);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_SingleGroup_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->singleGroup();
+    EXPECT_EQ(snap->graphSize(), 1);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_buildGraph_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+    auto g = snap->getGraph();
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+    }
+    EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize());
+}
+
+TEST(OnlinePartitioningTest, Partitioning_buildGraph_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+    auto g = snap->getGraph();
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+    }
+    EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize());
+}
+
+TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    ov::npuw::online::PassContext ctx;
+    ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}};
+    snap->setCtx(ctx);
+    snap->buildGraph();
+    snap->earlyAvoids();
+    auto g = snap->getGraph();
+    size_t count = 0;
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+        if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") {
+            ++count;
+        }
+    }
+    EXPECT_EQ(count, 2);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    ov::npuw::online::PassContext ctx;
+    ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}};
+    snap->setCtx(ctx);
+    snap->buildGraph();
+    snap->earlyAvoids();
+    auto g = snap->getGraph();
+    size_t count = 0;
+    for (const auto& nh : g->sorted()) {
+        ov::npuw::online::Group::GPtr group = g->meta(nh).get<ov::npuw::online::Group::GPtr>();
+        EXPECT_EQ(group->size(), 1);
+        if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") {
+            ++count;
+        }
+    }
+    EXPECT_EQ(count, 20);
+}
+
+TEST(OnlinePartitioningTest, Partitioning_collectLHF_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->collectLHF();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_collectLHF_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {82, 82};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->collectLHF();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnants();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {75, 38, 19, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnants();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {10, 10};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseInputs_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {15, 14, 14};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseInputs();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_fuseInputs_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes = {148, 138, 138};
+    size_t iter = 0;
+
+    snap->repeat([&]{
+        snap->fuseInputs();
+        EXPECT_LT(iter, sizes.size());
+        EXPECT_EQ(snap->graphSize(), sizes[iter++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes_lhf = {10, 10};
+    size_t iter_lhf = 0;
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    snap->repeat([&] {
+        snap->collectLHF();
+        EXPECT_LT(iter_lhf, sizes_lhf.size());
+        EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]);
+    });
+    snap->repeat([&] {
+        snap->fuseRemnants();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+    std::vector<std::size_t> sizes_lhf = {82, 82};
+    size_t iter_lhf = 0;
+
+    std::vector<std::size_t> sizes_fr = {41, 21, 11, 10, 10};
+    size_t iter_fr = 0;
+
+    snap->repeat([&] {
+        snap->collectLHF();
+        EXPECT_LT(iter_lhf, sizes_lhf.size());
+        EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]);
+    });
+    snap->repeat([&] {
+        snap->fuseRemnants();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 17);
+
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 0);
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+    snap->buildGraph();
+
+
+    std::vector<std::size_t> sizes_fr = {12, 12};
+    size_t iter_fr = 0;
+
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 18);
+
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 1);
+
+    for (const auto& m : matches) {
+        EXPECT_EQ(m.second.size(), 17);
+        for (const auto& layers : m.second) {
+            EXPECT_EQ(layers.size(), 10);
+        }
+    }
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_SmallModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_without_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    ov::npuw::online::PassContext ctx;
+    ctx.isolates = {{ov::npuw::online::PatternType::OP, "Transpose", "test_compute"}, {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"}};
+    ctx.nofolds = {"test_compute"};
+    snap->setCtx(ctx);
+
+    snap->buildGraph();
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 17);
+
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 0);
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
+
+TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_RepeatedModel) {
+    ModelGenerator mg;
+    auto model = mg.get_model_with_repeated_blocks();
+
+    auto snap = std::make_shared<ov::npuw::online::Snapshot>(model);
+
+    std::vector<std::size_t> sizes_fr = {10, 10};
+    size_t iter_fr = 0;
+
+    ov::npuw::online::PassContext ctx;
+    ctx.isolates = {{ov::npuw::online::PatternType::OP, "Gather", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "ShapeOf", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "Divide", "test_compute"},
+                    {ov::npuw::online::PatternType::OP, "Floor", "test_compute"}};
+    ctx.nofolds = {"test_compute"};
+    snap->setCtx(ctx);
+
+    snap->buildGraph();
+    snap->earlyAvoids();
+    snap->earlyRegroup();
+    snap->repeatedBlocks();
+    EXPECT_EQ(snap->graphSize(), 29);
+
+    // FIXME: create a config in which there will be repeated blocks
+    auto matches = snap->getMatches();
+    EXPECT_EQ(matches.size(), 0);
+
+    snap->repeat([&] {
+        snap->fuseRemnantsExtended();
+        EXPECT_LT(iter_fr, sizes_fr.size());
+        EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]);
+    });
+}
diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp
new file mode 100644
index 00000000000000..1049832f6ead7c
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef HAVE_AVX2
+#include "unpack.hpp"
+
+namespace {
+
+const auto TestCases = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::i4}),
+        ::testing::ValuesIn({ov::element::Type_t::i8, ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test
+        ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1, 1, 1, 32};},
+                             Tensors{input={1,1,1, 128};},
+                             Tensors{input={1,1,1, 390};},
+                             Tensors{input={1,1,1, 82};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTests, UnpackTests,
+                         TestCases,
+                         UnpackTests::getTestCaseName);
+
+const auto TestCasesScale = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::i4}), // TODO: add i8 as input for test
+        ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}),
+        ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}),
+        ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1,32, 128};     scale = {1, 32, 1};},
+                             Tensors{input={32, 128};       scale = {32, 1};},
+                             Tensors{input={64, 160};       scale = {64, 1};},
+                             Tensors{input={1024, 4};       scale = {64, 1};},
+                             Tensors{input={1, 1, 1024, 4}; scale = {1, 1, 64, 1};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackWithScaleTests, UnpackWithScaleTests,
+                         TestCasesScale,
+                         UnpackWithScaleTests::getTestCaseName);
+
+
+const auto TestCasesScaleAndZeroPoints = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1,32, 128};     scale = {1, 32, 1};},
+                             Tensors{input={1,64, 160};     scale = {1, 64, 1};},
+                             Tensors{input={1,1024, 4};     scale = {1, 64, 1};},
+                             Tensors{input={1,1, 1024, 4};  scale = {1, 1, 64, 1};},
+                             Tensors{input={64, 1};         scale = {64, 1};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPoint, UnpackTestsWithScaleAndZeroPoint,
+                         TestCasesScaleAndZeroPoints,
+                         UnpackTestsWithScaleAndZeroPoint::getTestCaseName);
+
+const auto TestCasesScaleAndZeroPoints2 = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::f32}),
+        ::testing::ValuesIn({ov::element::Type_t::f32}),
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={32, 32, 64};    scale = {32, 1, 64};},
+                             Tensors{input={64, 64, 128};   scale = {64, 1, 128};},
+                             Tensors{input={64, 32, 32};    scale = {64, 1, 32};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest2, UnpackTestsWithScaleAndZeroPointTest2,
+                         TestCasesScaleAndZeroPoints2,
+                         UnpackTestsWithScaleAndZeroPointTest2::getTestCaseName);
+
+const auto TestCasesScaleAndZeroPoints3 = ::testing::Combine(
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::f16}),
+        ::testing::ValuesIn({ov::element::Type_t::u4}),
+        ::testing::ValuesIn({3lu, 0lu}),
+        ::details::ShapesIn({Tensors{input={1, 32, 128};     scale = {1, 32, 1};   zerop = {1, 32, 1};},
+                             Tensors{input={16, 64, 64};     scale = {16, 64, 1};  zerop = {16, 64, 1};},
+                             Tensors{input={1, 1024, 4};     scale = {1, 64, 1};   zerop = {1, 32, 1};}}),
+        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true, false})
+);
+
+INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest3, UnpackTestsWithScaleAndZeroPointTest3,
+                         TestCasesScaleAndZeroPoints3,
+                         UnpackTestsWithScaleAndZeroPointTest3::getTestCaseName);
+
+} // anonymous namespace
+
+#endif // __AVX2__
diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp
new file mode 100644
index 00000000000000..da5bb4e4720f3e
--- /dev/null
+++ b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp
@@ -0,0 +1,628 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock-matchers.h>
+#include <gtest/gtest.h>
+#include <immintrin.h>
+#include <cstdint>
+#include <iomanip>
+#include <iostream>
+#include <array>
+
+#include "openvino/runtime/make_tensor.hpp"
+
+#include "util.hpp"
+
+namespace {
+
+#define ASSERT_NO_THROW_WITH_MESSAGE(code) do{ \
+    try {\
+     code;\
+     }catch (const std::exception &ex ) {\
+         FAIL()<<ex.what();\
+     }catch (...) {\
+         FAIL() << "Unknown exception";\
+     }\
+}while(0)
+
+#define ASSERT_NO_THROW_IF(condition, code) do { \
+if (condition) {ASSERT_NO_THROW_WITH_MESSAGE(code);} else {ASSERT_ANY_THROW(code);} \
+}while(0);
+
+namespace details {
+
+inline int8_t hi4(int8_t x) {
+    return ((x & (1 << 7)) >> 4) | ((x & (1 << 6)) >> 4) | ((x & (1 << 5)) >> 4) | ((x & (1 << 4)) >> 4);
+}
+
+inline int8_t lo4(int8_t x) {
+    return (x & (1 << 3)) | (x & (1 << 2)) | (x & (1 << 1)) | (x & (1 << 0));
+}
+
+inline uint8_t hi4(uint8_t x) {
+    return x >> 4;
+}
+
+inline uint8_t lo4(uint8_t x) {
+    return x & 0x0F;
+}
+
+inline int8_t upc(int8_t h) {
+    return h | (-((h & (1 << 3)) >> 3) & (-8));
+}
+
+typedef unsigned short ushort;
+typedef unsigned int uint;
+
+float half_to_float(const ushort x) {
+
+    __m128i halfVector = _mm_cvtsi32_si128(x);
+    __m128 floatVector = _mm_cvtph_ps(halfVector);
+    return _mm_cvtss_f32(floatVector);
+}
+
+ushort float_to_half(const float x) {
+    __m128 floatVector = _mm_set_ss(x);
+    __m128i halfVector = _mm_cvtps_ph(floatVector, _MM_FROUND_TO_NEAREST_INT);
+    return _mm_extract_epi16(halfVector, 0);
+}
+
+inline uint16_t int2hfloat(int8_t x)
+{
+    float inputFl32 = static_cast<float>(x);
+    float* inputFl32_ptr = &inputFl32;
+    unsigned int* fltInt32Ptr = reinterpret_cast<unsigned int*>(inputFl32_ptr);
+    unsigned int fltInt32 = *fltInt32Ptr;
+    unsigned short fltInt16;
+
+    fltInt16 = (fltInt32 >> 31) << 5;
+    unsigned short tmp = (fltInt32 >> 23) & 0xff;
+    tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27);
+    fltInt16 = (fltInt16 | tmp) << 10;
+    fltInt16 |= (fltInt32 >> 13) & 0x3ff;
+
+    return fltInt16;
+}
+
+
+void unpack(const int8_t* in, int8_t* out, int size) {
+    for (int i = 0; i < size / 2; i++) {
+        *(out++) = upc(lo4(*in));
+        *(out++) = upc(hi4(*in));
+        in++;
+    }
+}
+
+void unpack_i4f16(const int8_t* in, int8_t* out, int size) {
+    uint16_t *hFloatOut = reinterpret_cast<uint16_t *>(out);
+
+    for (int i = 0; i < size / 2; i++) {
+        *(hFloatOut++) = int2hfloat(upc(lo4(*in)));
+        *(hFloatOut++) = int2hfloat(upc(hi4(*in)));
+        in++;
+    }
+}
+
+/*u4 order*/
+void unpack_u4f32(const int8_t* in, float* out, int size) {
+    for (int i = 0; i < size / 2; i++) {
+        *(out++) = static_cast<float>(lo4(*in));
+        *(out++) = static_cast<float>(hi4(*in));
+        in++;
+    }
+}
+
+template<typename T>
+::testing::AssertionResult fp16ArraysMatch(const T &actual,
+                                           const T &expected,
+                                           const T &i4Input,
+                                           bool int4 = 1 /*i4 or u4*/){
+    for (size_t i = 0; i < expected.size() / 2; ++i) {
+
+        int int8Input[] ={
+                details::lo4(i4Input[i / 2]),
+                details::hi4(i4Input[i / 2])
+        };
+
+        if (int4) {
+            int8Input[0] = details::upc(int8Input[1]);
+            int8Input[1] = details::upc(int8Input[0]);
+        };
+
+        auto fp16ref = int{*((uint16_t*)expected.data() + i)};
+        auto fp16out = int{*((uint16_t*)actual.data() + i)};
+
+#define _P(x) std::dec << std::setw(5) << (x) << '(' << std::setw(4) << std::hex << (x) << ')'
+        if (fp16ref != fp16out) {
+            return ::testing::AssertionFailure() << std::dec << std::setw(4) << i << ", i4:"
+                                                 << std::setw(2) << int8Input[i % 2]
+                                                 << " | ref " << _P(fp16ref)
+                                                 << ", test "  << _P(fp16out) << "\n";
+        }
+#undef  _P
+
+    }
+
+    return ::testing::AssertionSuccess();
+}
+
+}  // namespace details
+
+using ShapesInitializer = std::function<void (std::vector<int>&, std::vector<int>&, std::vector<int>&)>;
+
+
+using UnpackTestsParams = std::tuple<
+        ov::element::Type_t,  // fromPrecision
+        ov::element::Type_t,  // toPrecision
+        ov::element::Type_t,  // scalePrecision
+        ov::element::Type_t,  // zeroPointPrecision
+        unsigned long,        // nPartitions
+        ShapesInitializer,    // input_shape , scale_shape, zerop initializer
+        bool,                 // use parallel_for
+        bool                  // strict partitioning
+        >;
+
+class UnpackTestsBase {
+protected:
+    ov::element::Type fromType;
+    ov::element::Type toType;
+    ov::element::Type scaleType;
+    ov::element::Type zeropType;
+    std::shared_ptr<ov::ITensor> from, to, scale, zerop;
+
+    std::vector<int8_t> input;
+    std::vector<int8_t> output;
+    std::vector<int8_t> ref_output;
+    std::vector<int8_t> scalesStorage;
+    std::vector<int8_t> zeropStorage;
+    float zeropValue;
+    ov::Shape input_shape;
+    ov::Shape scale_shape;
+    ov::Shape zerop_shape;
+
+    size_t nPartitions;
+    bool useParallelFor = false;
+    bool strictPartitions = false;
+
+    void make_zeropoints() {
+        if (zeropType == ov::element::undefined) {
+            return;
+        }
+
+        const std::vector<float> zeropValues = {15.0f, 12.0f, 0.0f, 31.0f};
+        const size_t nElements = shape_size(zerop_shape);
+
+        // Set zeropValue if there's only one element
+        if (nElements == 1) {
+            zeropValue = zeropValues.front();
+        }
+
+        // Determine the size of the storage based on the type and resize the storage vector
+        if (zeropType == ov::element::Type_t::u4) {
+            zeropStorage.resize((nElements + 1) / 2, 0); // Each u4 zeropoint is 4 bits, so two zeropoints fit in one byte
+        } else if (zeropType == ov::element::Type_t::f32) {
+            zeropStorage.resize(nElements * sizeof(float), 0);
+        } else {
+            ASSERT_TRUE(zeropType == ov::element::u4 || zeropType == ov::element::f32);
+        }
+
+        // Fill the storage with the appropriate values
+        if (zeropType == ov::element::Type_t::u4) {
+            for (size_t i = 0; i < nElements; ++i) {
+                uint8_t zeropValueU4 = static_cast<uint8_t>(zeropValues[i % zeropValues.size()]) & 0x0F;
+                size_t byteIndex = i / 2;
+                if (i % 2 == 0) {
+                    zeropStorage[byteIndex] = zeropValueU4;
+                } else {
+                    zeropStorage[byteIndex] = (zeropValueU4 << 4);
+                }
+            }
+        } else if (zeropType == ov::element::Type_t::f32) {
+            float* ptrWork = reinterpret_cast<float*>(zeropStorage.data());
+            for (size_t i = 0; i < nElements; ++i) {
+                ptrWork[i] = zeropValues[i % zeropValues.size()];
+            }
+        }
+
+        // Create the tensor
+        zerop = ov::make_tensor(zeropType, zerop_shape, zeropStorage.data());
+    }
+
+    void make_scales() {
+        if (scaleType == ov::element::undefined) {
+            return;
+        }
+        ASSERT_TRUE(scaleType == ov::element::f16 || scaleType == ov::element::f32);
+        size_t nElements = shape_size(scale_shape);
+
+        // creating custom scale factors
+        const size_t nScaleBytes  = scaleType.bitwidth() * nElements  / 8;
+
+        std::vector<float> sc(nElements);
+        float coeffTable[] = {
+                0.1f,
+                0.5f,
+                1.f,
+                2.f
+        };
+        for (size_t i = 0; i != nElements; i++) {
+            sc[i] = coeffTable[i % (sizeof (coeffTable) / sizeof(*coeffTable))];
+        }
+        scalesStorage.resize(nScaleBytes);
+
+        if (scaleType == ov::element::f16) {
+            uint16_t * ptrWork = reinterpret_cast<uint16_t*>(scalesStorage.data());
+            for (size_t i = 0; i != nElements; i++) {
+                ptrWork[i] = details::float_to_half(sc[i]);
+            }
+        }
+        if (scaleType == ov::element::f32) {
+            float* ptrWork = reinterpret_cast<float*>(scalesStorage.data());
+            for (size_t i = 0; i != nElements; i++) {
+                ptrWork[i] = sc[i];
+            }
+        }
+        scale = ov::make_tensor(scaleType, scale_shape, scalesStorage.data());
+    }
+
+    void make_input() {
+
+        size_t nElements = shape_size(input_shape);
+
+        ASSERT_EQ((fromType.bitwidth() * nElements) % 8, 0) << "Input len has to be byte boundary aligned, but was "
+                                                            << fromType.bitwidth() * nElements << " bits";
+        ASSERT_EQ((toType.bitwidth() * nElements) % 8, 0) << "Output len has to be byte boundary aligned";
+
+        const size_t nInputBytes  = fromType.bitwidth() * nElements  / 8;
+        const size_t nOutputBytes = toType.bitwidth() * nElements  / 8;
+
+        input.resize(nInputBytes);
+        ref_output.resize(nOutputBytes);
+        output.resize(nOutputBytes);
+        std::fill(ref_output.begin(), ref_output.end(), 0);
+        std::fill(output.begin(), output.end(), 0);
+
+        std::array<int8_t, 32> input_local = {
+                0x0A, 0x0B, 0x1C, 0x1D, 0x2E, 0x2F, 0x35, 0x36,
+                0x4A, 0x4B, 0x5A, 0x5B, 0x6A, 0x6B, 0x7A, 0x7B,
+                0x0C, 0x0D, 0x1C, 0x1D, 0x2C, 0x2D, 0x3C, 0x3D,
+                0x4C, 0x4D, 0x5C, 0x5D, 0x6C, 0x6D, 0x7C, 0x7D,
+        };
+
+        for (size_t idx = 0, k = 0; k < nInputBytes; k++, idx = (idx + 1) % input_local.size()) {
+            input[k] = input_local[idx];
+        }
+
+        from = ov::make_tensor(fromType, input_shape, input.data());
+        to = ov::make_tensor(toType, input_shape, output.data());
+    }
+public:
+    void SetUp(const UnpackTestsParams & getParam) {
+        ShapesInitializer shapeInit;
+
+        std::tie(fromType, toType, scaleType, zeropType, nPartitions, shapeInit, useParallelFor, strictPartitions) = getParam;
+
+        std::vector<int> input, scale, zerop;
+        shapeInit(input, scale, zerop);
+
+        input_shape = ov::Shape{input.begin(), input.end()};
+        scale_shape = ov::Shape{scale.begin(), scale.end()};
+        if (zerop.empty()) {
+            zerop_shape = ov::Shape({1});
+        } else {
+            zerop_shape = ov::Shape{zerop.begin(), zerop.end()};
+        }
+
+        make_input();
+        make_scales();
+        make_zeropoints();
+
+        make_ref_output();
+    }
+    std::string ToString() const {
+        std::ostringstream result;
+        result << (isNegative() ? "NEGATIVE_" : "")
+               <<"[";
+
+        for (size_t i = 0; i != input_shape.size(); i++) {
+            result << input_shape[i] << ((i + 1 == input_shape.size()) ? "" : "x");
+        }
+        result <<"]"
+               << "_p" << nPartitions
+               << (strictPartitions ? "_SP" : "")
+               << (useParallelFor ? "_parallel" : "_serial")
+               << "_from_" << fromType
+               << "_to_" << toType;
+        if (scaleType != ov::element::Type_t::undefined)
+            result << "_scale_" << scaleType;
+        if (zeropType != ov::element::Type_t::undefined)
+            result << "_zerop_" << zeropType;
+
+        return result.str();
+    }
+
+    /**
+     * Negative test cases has to be carefully reviewed, to still remain positive runs at some points
+     * @return
+     */
+    virtual bool isNegative() const {
+        return false;
+    }
+
+    virtual void make_ref_output() {
+        size_t nElements = 1;
+        for (size_t dim : input_shape) {
+            nElements *= dim;
+        }
+        if (toType == ov::element::i8) {
+            details::unpack(input.data(), ref_output.data(), static_cast<int>(nElements));
+        } else if (toType == ov::element::f16) {
+            details::unpack_i4f16(input.data(), ref_output.data(), static_cast<int>(nElements));
+        }
+    }
+};
+
+template <class T>
+class UnpackTestsTmpl :
+        public ::testing::Test,
+        public T,
+        public ::testing::WithParamInterface<UnpackTestsParams> {
+protected:
+
+    void SetUp() override {
+        T::SetUp(GetParam());
+    }
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<UnpackTestsParams>& obj) {
+        T _bt;
+        _bt.SetUp(obj.param);
+        return _bt.ToString();
+    }
+};
+
+using UnpackTests = UnpackTestsTmpl<UnpackTestsBase>;
+class UnpackTestsRef : public UnpackTests {};
+
+TEST_P(UnpackTests, i4) {
+    ASSERT_NO_THROW_WITH_MESSAGE(ov::npuw::util::unpack(from, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input));
+}
+
+class UnpackWithScaleTestsBase : public UnpackTestsBase {
+protected:
+    bool isNegative() const override {
+        if (scale_shape.size() != 3 && scale_shape.size() != 2) return true;
+        if (input_shape.back() % 64) return true;
+        if ((from->get_size() / scale->get_size()) % 64) return true;
+        if (toType != ov::element::f16) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+
+        const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size();
+
+        details::unpack_i4f16(input.data(), ref_output.data(), static_cast<int>(nElements));
+
+        // lets apply per channel scale
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t i = 0; i < scale->get_size(); i++) {
+            for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) {
+                float ref_scaled = details::half_to_float(pRef[0]);
+                if (scaleType == ov::element::f32) {
+                    ref_scaled *= pScale_f32[0];
+                } else if (scaleType == ov::element::f16) {
+                    ref_scaled *= details::half_to_float(pScale_f16[0]);
+                }
+                *pRef = details::float_to_half(ref_scaled);
+                pRef++;
+            }
+            pScale_f32++;
+            pScale_f16++;
+        }
+    }
+
+};
+
+using UnpackWithScaleTests = UnpackTestsTmpl<UnpackWithScaleTestsBase>;
+
+
+TEST_P(UnpackWithScaleTests, i4_scale) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input));
+    }
+}
+
+
+class UnpackTestsWithScaleAndZeroPointBase : public UnpackTestsBase {
+protected:
+    bool isNegative() const override {
+        if (scale_shape.size() != 3 && scale_shape.size() != 2) return true;
+        if (input_shape.back() % 64) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+
+        const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size();
+
+        std::vector<float> floatRef(nElements);
+        details::unpack_u4f32(input.data(), floatRef.data(), static_cast<int>(nElements));
+
+
+        // lets apply per channel scale
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        float * pFloatRef = reinterpret_cast<float*>(floatRef.data());
+        const uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        const float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t i = 0; i < scale->get_size(); i++) {
+            for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) {
+                // applying zeropoint
+                float ref_scaled = *pFloatRef - zeropValue;
+
+                if (scaleType == ov::element::f32) {
+                    ref_scaled *= pScale_f32[0];
+                } else if (scaleType == ov::element::f16) {
+                    ref_scaled *= details::half_to_float(pScale_f16[0]);
+                }
+                *pRef = details::float_to_half(ref_scaled);
+
+                pFloatRef++;
+                pRef++;
+            }
+            pScale_f32++;
+            pScale_f16++;
+        }
+    }
+};
+
+using UnpackTestsWithScaleAndZeroPoint = UnpackTestsTmpl<UnpackTestsWithScaleAndZeroPointBase>;
+
+TEST_P(UnpackTestsWithScaleAndZeroPoint, u4) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false));
+    }
+}
+
+class UnpackTestsWithScaleAndZeroPoint2 : public UnpackTestsWithScaleAndZeroPointBase {
+protected:
+    bool isNegative() const override {
+        if (input_shape.back() % 64 || input_shape.size() != 3) return true;
+        if (scale_shape.back() % 64 || scale_shape.size() != 3) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+        const auto from_shape = from->get_shape();
+
+        const size_t C = from_shape[from_shape.size() - 3];
+        const size_t H = from_shape[from_shape.size() - 2];
+        const size_t W = from_shape[from_shape.size() - 1];
+
+        std::vector<float> floatRef(nElements);
+        details::unpack_u4f32(input.data(), floatRef.data(), static_cast<int>(nElements));
+
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        float * pFloatRef = reinterpret_cast<float*>(floatRef.data());
+        const uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        const float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t c = 0; c < C; ++c) {
+            for (size_t h = 0; h < H; ++h) {
+                for (size_t w = 0; w < W; ++w) {
+                    size_t input_index =  w + W * h + W * H * c;
+                    size_t scale_index = w + W * c;
+                    float ref_scaled = pFloatRef[input_index] - zeropValue;
+                    if (scaleType == ov::element::f32) {
+                        ref_scaled *= pScale_f32[scale_index];
+                    } else if (scaleType == ov::element::f16) {
+                        ref_scaled *= details::half_to_float(pScale_f16[scale_index]);
+                    }
+                    pRef[w + W * h + c * W * H] = details::float_to_half(ref_scaled);
+                }
+            }
+        }
+    }
+};
+
+using UnpackTestsWithScaleAndZeroPointTest2 = UnpackTestsTmpl<UnpackTestsWithScaleAndZeroPoint2>;
+
+TEST_P(UnpackTestsWithScaleAndZeroPointTest2, u4) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false));
+    }
+}
+
+class UnpackTestsWithScaleAndZeroPoint3 : public UnpackTestsWithScaleAndZeroPointBase {
+protected:
+    bool isNegative() const override {
+        if (scale_shape.size() != 3 || zerop_shape.size() != 3) return true;
+        if (input_shape[2] % 64 || input_shape.size() != 3) return true;
+
+        return false;
+    }
+
+    void make_ref_output() override {
+        if (isNegative()) return;
+
+        size_t nElements = from->get_size();
+
+        const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size();
+
+        std::vector<float> floatRef(nElements);
+        details::unpack_u4f32(input.data(), floatRef.data(), static_cast<int>(nElements));
+
+
+        // lets apply per channel scale
+        uint16_t * pRef = reinterpret_cast<uint16_t*>(ref_output.data());
+        const uint8_t* pZer = static_cast<uint8_t*>(zerop->data());
+        float * pFloatRef = reinterpret_cast<float*>(floatRef.data());
+        const uint16_t * pScale_f16 = reinterpret_cast<uint16_t*>(scale->data());
+        const float * pScale_f32 = reinterpret_cast<float*>(scale->data());
+
+        for (size_t i = 0; i < scale->get_size(); i++) {
+            float zeroPointValue = static_cast<float>((i % 2 == 0) ? details::lo4(pZer[i / 2]) : details::hi4(pZer[i / 2]));
+            for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) {
+                // applying zeropoint
+                float ref_scaled = *pFloatRef - zeroPointValue;
+
+                if (scaleType == ov::element::f32) {
+                    ref_scaled *= pScale_f32[0];
+                } else if (scaleType == ov::element::f16) {
+                    ref_scaled *= details::half_to_float(pScale_f16[0]);
+                }
+                *pRef = details::float_to_half(ref_scaled);
+
+                pFloatRef++;
+                pRef++;
+            }
+            pScale_f32++;
+            pScale_f16++;
+        }
+    }
+};
+
+using UnpackTestsWithScaleAndZeroPointTest3 = UnpackTestsTmpl<UnpackTestsWithScaleAndZeroPoint3>;
+
+TEST_P(UnpackTestsWithScaleAndZeroPointTest3, u4) {
+    ASSERT_NO_THROW_IF(!isNegative(),
+                       ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions}));
+    if (!isNegative()) {
+        ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false));
+    }
+}
+
+#define Tensors [](std::vector<int>& input, std::vector<int>&scale, std::vector<int>&zerop)
+
+
+namespace details {
+::testing::internal::ParamGenerator<typename std::vector<ShapesInitializer>::value_type> ShapesIn(
+        const std::vector<ShapesInitializer>& container) {
+    return ::testing::ValuesIn(container.begin(), container.end());
+}
+
+}  // namespace details
+}  // anonymous namespace

From 2fc0faedfa69caf2af5b5cd27c2f3cf5ad2203bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hubert=20B=C5=82aszczyk?=
 <56601011+hub-bla@users.noreply.github.com>
Date: Thu, 3 Oct 2024 14:21:01 +0200
Subject: [PATCH 4/4] [TF FE]: Support complex tensors for ExpandDims operation
 (#26892)

### Details:
 - Support complex tensors for `ExpandDims` operation + tests

### Tickets:
 - [None](https://github.com/openvinotoolkit/openvino/issues/22950)
---
 .../tensorflow_common/src/op/expand_dims.cpp  | 30 ++++++++++-
 .../tensorflow_tests/test_tf_ExpandDims.py    | 52 +++++++++++++++++++
 2 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/src/frontends/tensorflow_common/src/op/expand_dims.cpp b/src/frontends/tensorflow_common/src/op/expand_dims.cpp
index b3b37ad38cc302..a40e5c9b1bc6df 100644
--- a/src/frontends/tensorflow_common/src/op/expand_dims.cpp
+++ b/src/frontends/tensorflow_common/src/op/expand_dims.cpp
@@ -3,7 +3,13 @@
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/less.hpp"
+#include "openvino/op/select.hpp"
+#include "openvino/op/subtract.hpp"
 #include "openvino/op/unsqueeze.hpp"
+#include "utils.hpp"
 
 using namespace std;
 using namespace ov::op;
@@ -14,9 +20,31 @@ namespace tensorflow {
 namespace op {
 
 OutputVector translate_expand_dims_op(const NodeContext& node) {
-    default_op_checks(node, 2, {"ExpandDims", "EXPAND_DIMS"});
+    default_op_checks(node, 2, {"ExpandDims", "EXPAND_DIMS"}, true);
     auto input = node.get_input(0);
     auto axis = node.get_input(1);
+    auto complex_type_mark = as_type_ptr<ComplexTypeMark>(input.get_node_shared_ptr());
+
+    if (complex_type_mark) {
+        element::Type complex_part_type = complex_type_mark->get_complex_part_type();
+        input = complex_type_mark->input_value(0);
+
+        auto const_zero = create_same_type_const_scalar<int32_t>(axis, 0);
+
+        auto is_axis_neg = make_shared<v1::Less>(axis, const_zero);
+
+        auto const_one = create_same_type_const_scalar<int32_t>(axis, 1);
+        auto axis_min_one = make_shared<v1::Subtract>(axis, const_one);
+
+        auto new_axis = make_shared<v1::Select>(is_axis_neg, axis_min_one, axis);
+
+        auto unsqueeze = make_shared<v0::Unsqueeze>(input, new_axis);
+
+        set_node_name(node.get_name(), unsqueeze);
+        auto complex_result = make_shared<ComplexTypeMark>(unsqueeze, complex_part_type);
+        return {complex_result};
+    }
+
     auto unsqueeze = make_shared<v0::Unsqueeze>(input, axis);
     set_node_name(node.get_name(), unsqueeze);
     return {unsqueeze};
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py b/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py
index f0f9085d32ba2f..e982867c9ac08d 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py
@@ -6,6 +6,7 @@
 import tensorflow as tf
 from common.tf_layer_test_class import CommonTFLayerTest
 
+rng = np.random.default_rng(62362)
 
 class TestExpandDims(CommonTFLayerTest):
     def _prepare_input(self, inputs_info):
@@ -40,3 +41,54 @@ def test_expand_dims_basic(self, params, ie_device, precision, ir_version, temp_
         self._test(*self.create_expand_dims_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_legacy_frontend=use_legacy_frontend)
+
+
+class TestExpandDimsComplex(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        # generate elements so that the input tensor may contain repeating elements
+        assert 'param_real:0' in inputs_info
+        assert 'param_imag:0' in inputs_info
+
+        input_shape = inputs_info['param_real:0']
+
+        inputs_data = {}
+        inputs_data['param_real:0'] = rng.integers(-10.0, 10.0, input_shape).astype(np.float32)
+        inputs_data['param_imag:0'] = rng.integers(-10.0, 10.0, input_shape).astype(np.float32)
+
+        return inputs_data
+
+    def create_expand_dims_complex_net(self, axis_dtype, input_shape, axis):
+        tf.compat.v1.reset_default_graph()
+        with tf.compat.v1.Session() as sess:
+            param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real')
+            param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag')
+
+            complex = tf.raw_ops.Complex(real=param_real, imag=param_imag)
+
+            axis = tf.constant(axis, dtype=axis_dtype)
+
+            result = tf.raw_ops.ExpandDims(input=complex, axis=axis)
+
+            tf.raw_ops.Real(input=result)
+            tf.raw_ops.Imag(input=result)
+
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_basic = [
+        dict(input_shape=[], axis=0),
+        dict(input_shape=[2, 3], axis=1),
+        dict(input_shape=[2, 3, 4], axis=-1),
+        dict(input_shape=[2, 6, 5], axis=-2),
+    ]
+
+    @pytest.mark.parametrize("axis_dtype", [np.int32, np.int64])
+    @pytest.mark.parametrize("op_args", test_basic)
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_expand_dims_basic_complex(self, axis_dtype, op_args, ie_device, precision, ir_version, temp_dir, use_legacy_frontend):
+        self._test(*self.create_expand_dims_complex_net(axis_dtype, **op_args),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_legacy_frontend=use_legacy_frontend)