diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp
index d8163ce3a91005..1a460e83eef00d 100644
--- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp
+++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp
@@ -177,6 +177,16 @@ void regclass_CompiledModel(py::module m) {
                 :rtype: openvino.runtime.Model
             )");
 
+    cls.def("release_memory",
+            &ov::CompiledModel::release_memory,
+            py::call_guard<py::gil_scoped_release>(),
+            R"(
+                Release intermediate memory.
+
+                This method forces the Compiled model to release memory allocated for intermediate structures,
+                e.g. caches, tensors, temporal buffers etc., when possible
+            )");
+
     cls.def_property_readonly("inputs",
                               &ov::CompiledModel::inputs,
                               R"(
diff --git a/src/bindings/python/tests/test_runtime/test_async_infer_request.py b/src/bindings/python/tests/test_runtime/test_async_infer_request.py
index ea1bf1a9f2a4af..f715d7b7b1238e 100644
--- a/src/bindings/python/tests/test_runtime/test_async_infer_request.py
+++ b/src/bindings/python/tests/test_runtime/test_async_infer_request.py
@@ -19,46 +19,12 @@
     Tensor,
 )
 from tests import skip_need_mock_op
-from tests.utils.helpers import generate_image, get_relu_model
-
-
-def concat_model_with_data(device, ov_type, numpy_dtype):
-    core = Core()
-
-    input_shape = [5]
-
-    params = []
-    params += [ops.parameter(input_shape, ov_type)]
-    if ov_type == Type.bf16:
-        params += [ops.parameter(input_shape, ov_type)]
-    else:
-        params += [ops.parameter(input_shape, numpy_dtype)]
-
-    model = Model(ops.concat(params, 0), params)
-    compiled_model = core.compile_model(model, device)
-    request = compiled_model.create_infer_request()
-    tensor1 = Tensor(ov_type, input_shape)
-    tensor1.data[:] = np.array([6, 7, 8, 9, 0])
-    array1 = np.array([1, 2, 3, 4, 5], dtype=numpy_dtype)
-
-    return request, tensor1, array1
-
-
-def abs_model_with_data(device, ov_type, numpy_dtype):
-    input_shape = [1, 4]
-    param = ops.parameter(input_shape, ov_type)
-    model = Model(ops.abs(param), [param])
-    core = Core()
-    compiled_model = core.compile_model(model, device)
-
-    request = compiled_model.create_infer_request()
-
-    tensor1 = Tensor(ov_type, input_shape)
-    tensor1.data[:] = np.array([6, -7, -8, 9])
-
-    array1 = np.array([[-1, 2, 5, -3]]).astype(numpy_dtype)
-
-    return compiled_model, request, tensor1, array1
+from tests.utils.helpers import (
+    generate_image,
+    get_relu_model,
+    generate_concat_compiled_model_with_data,
+    generate_abs_compiled_model_with_data,
+)
 
 
 @pytest.mark.parametrize("share_inputs", [True, False])
@@ -360,7 +326,7 @@ def callback(callbacks_info):
 ])
 @pytest.mark.parametrize("share_inputs", [True, False])
 def test_async_mixed_values(device, ov_type, numpy_dtype, share_inputs):
-    request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype)
+    request, tensor1, array1 = generate_concat_compiled_model_with_data(device=device, ov_type=ov_type, numpy_dtype=numpy_dtype)
 
     request.start_async([tensor1, array1], share_inputs=share_inputs)
     request.wait()
@@ -380,7 +346,7 @@ def test_async_mixed_values(device, ov_type, numpy_dtype, share_inputs):
 ])
 @pytest.mark.parametrize("share_inputs", [True, False])
 def test_async_single_input(device, ov_type, numpy_dtype, share_inputs):
-    _, request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype)
+    _, request, tensor1, array1 = generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype)
 
     request.start_async(array1, share_inputs=share_inputs)
     request.wait()
@@ -401,7 +367,7 @@ def __init__(self, array) -> None:
         def __array__(self):
             return np.array(self.data)
 
-    _, request, _, input_data = abs_model_with_data(device, Type.f32, np.single)
+    _, request, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.single)
     model_input_object = ArrayLikeObject(input_data.tolist())
     model_input_list = [ArrayLikeObject(input_data.tolist())]
     # Test single array-like object in InferRequest().start_async()
diff --git a/src/bindings/python/tests/test_runtime/test_compiled_model.py b/src/bindings/python/tests/test_runtime/test_compiled_model.py
index f7e84daf281485..d6909fa94711d3 100644
--- a/src/bindings/python/tests/test_runtime/test_compiled_model.py
+++ b/src/bindings/python/tests/test_runtime/test_compiled_model.py
@@ -2,7 +2,6 @@
 # Copyright (C) 2018-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import sys
 import pytest
 import numpy as np
 
@@ -10,6 +9,7 @@
     get_relu_model,
     generate_image,
     generate_model_and_image,
+    generate_concat_compiled_model,
     generate_relu_compiled_model,
     generate_relu_compiled_model_with_config,
     encrypt_base64,
@@ -18,6 +18,7 @@
 from openvino import Model, Shape, Core, Tensor, serialize
 from openvino.runtime import ConstOutput
 
+import openvino.runtime.opset13 as ops
 import openvino.properties as props
 
 
@@ -276,3 +277,15 @@ def test_compiled_model_from_buffer_in_memory(request, tmp_path, device):
 
     compiled = core.compile_model(model=xml, weights=weights, device_name=device)
     _ = compiled([np.random.normal(size=list(input.shape)).astype(dtype=input.get_element_type().to_dtype()) for input in compiled.inputs])
+
+
+def test_memory_release(device):
+    compiled_model = generate_concat_compiled_model(device)
+    request = compiled_model.create_infer_request()
+
+    input_tensor = Tensor(compiled_model.inputs[0].get_element_type(), compiled_model.inputs[0].get_shape())
+    request.infer({0: input_tensor, 1: input_tensor})
+
+    # Release memory and perform inference again
+    compiled_model.release_memory()
+    request.infer({0: input_tensor, 1: input_tensor})
diff --git a/src/bindings/python/tests/test_runtime/test_sync_infer_request.py b/src/bindings/python/tests/test_runtime/test_sync_infer_request.py
index de30955e6c7204..e97f8e6530cadc 100644
--- a/src/bindings/python/tests/test_runtime/test_sync_infer_request.py
+++ b/src/bindings/python/tests/test_runtime/test_sync_infer_request.py
@@ -25,17 +25,17 @@
 from openvino.runtime import ProfilingInfo
 from openvino.preprocess import PrePostProcessor
 
-from tests.utils.helpers import generate_image, get_relu_model
+from tests.utils.helpers import (
+    generate_image,
+    get_relu_model,
+    generate_concat_compiled_model_with_data,
+    generate_add_compiled_model,
+    generate_abs_compiled_model_with_data,
+)
 
 
 def create_simple_request_and_inputs(device):
-    input_shape = [2, 2]
-    param_a = ops.parameter(input_shape, np.float32)
-    param_b = ops.parameter(input_shape, np.float32)
-    model = Model(ops.add(param_a, param_b), [param_a, param_b])
-
-    core = Core()
-    compiled_model = core.compile_model(model, device)
+    compiled_model = generate_add_compiled_model(device, input_shape=[2, 2])
     request = compiled_model.create_infer_request()
 
     arr_1 = np.array([[1, 2], [3, 4]], dtype=np.float32)
@@ -44,45 +44,6 @@ def create_simple_request_and_inputs(device):
     return request, arr_1, arr_2
 
 
-def concat_model_with_data(device, ov_type, numpy_dtype):
-    core = Core()
-
-    input_shape = [5]
-
-    params = []
-    params += [ops.parameter(input_shape, ov_type)]
-    if ov_type == Type.bf16:
-        params += [ops.parameter(input_shape, ov_type)]
-    else:
-        params += [ops.parameter(input_shape, numpy_dtype)]
-
-    model = Model(ops.concat(params, 0), params)
-    compiled_model = core.compile_model(model, device)
-    request = compiled_model.create_infer_request()
-    tensor1 = Tensor(ov_type, input_shape)
-    tensor1.data[:] = np.array([6, 7, 8, 9, 0])
-    array1 = np.array([1, 2, 3, 4, 5], dtype=numpy_dtype)
-
-    return request, tensor1, array1
-
-
-def abs_model_with_data(device, ov_type, numpy_dtype):
-    input_shape = [1, 4]
-    param = ops.parameter(input_shape, ov_type)
-    model = Model(ops.abs(param), [param])
-    core = Core()
-    compiled_model = core.compile_model(model, device)
-
-    request = compiled_model.create_infer_request()
-
-    tensor1 = Tensor(ov_type, input_shape)
-    tensor1.data[:] = np.array([6, -7, -8, 9])
-
-    array1 = np.array([[-1, 2, 5, -3]]).astype(numpy_dtype)
-
-    return compiled_model, request, tensor1, array1
-
-
 def test_get_profiling_info(device):
     core = Core()
     param = ops.parameter([1, 3, 32, 32], np.float32, name="data")
@@ -347,7 +308,7 @@ def test_infer_mixed_keys(device, share_inputs):
 ])
 @pytest.mark.parametrize("share_inputs", [True, False])
 def test_infer_mixed_values(device, ov_type, numpy_dtype, share_inputs):
-    request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype)
+    request, tensor1, array1 = generate_concat_compiled_model_with_data(device=device, ov_type=ov_type, numpy_dtype=numpy_dtype)
 
     request.infer([tensor1, array1], share_inputs=share_inputs)
 
@@ -367,7 +328,7 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype, share_inputs):
 ])
 @pytest.mark.parametrize("share_inputs", [True, False])
 def test_infer_single_input(device, ov_type, numpy_dtype, share_inputs):
-    _, request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype)
+    _, request, tensor1, array1 = generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype)
 
     request.infer(array1, share_inputs=share_inputs)
     assert np.array_equal(request.get_output_tensor().data, np.abs(array1))
@@ -605,7 +566,7 @@ def __init__(self, array) -> None:
         def __array__(self):
             return np.array(self.data)
 
-    _, request, _, input_data = abs_model_with_data(device, Type.f32, np.single)
+    _, request, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.single)
     model_input_object = ArrayLikeObject(input_data.tolist())
     model_input_list = [ArrayLikeObject(input_data.tolist())]
     model_input_dict = {0: ArrayLikeObject(input_data.tolist())}
@@ -772,7 +733,7 @@ def test_not_writable_inputs_infer(device, share_inputs, input_data, change_flag
 @pytest.mark.parametrize("share_outputs", [True, False])
 @pytest.mark.parametrize("is_positional", [True, False])
 def test_compiled_model_share_memory(device, share_inputs, share_outputs, is_positional):
-    compiled, _, _, input_data = abs_model_with_data(device, Type.f32, np.float32)
+    compiled, _, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.float32)
 
     if is_positional:
         results = compiled(input_data, share_inputs=share_inputs, share_outputs=share_outputs)
@@ -800,7 +761,7 @@ def test_compiled_model_share_memory(device, share_inputs, share_outputs, is_pos
 @pytest.mark.parametrize("share_outputs", [True, False])
 @pytest.mark.parametrize("is_positional", [True, False])
 def test_infer_request_share_memory(device, share_inputs, share_outputs, is_positional):
-    _, request, _, input_data = abs_model_with_data(device, Type.f32, np.float32)
+    _, request, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.float32)
 
     if is_positional:
         results = request.infer(input_data, share_inputs=share_inputs, share_outputs=share_outputs)
diff --git a/src/bindings/python/tests/utils/helpers.py b/src/bindings/python/tests/utils/helpers.py
index 098e968b64e241..2ea00484e9840c 100644
--- a/src/bindings/python/tests/utils/helpers.py
+++ b/src/bindings/python/tests/utils/helpers.py
@@ -13,7 +13,7 @@
 from pathlib import Path
 
 import openvino
-from openvino import Model, Core, Shape
+from openvino import Model, Core, Shape, Tensor, Type
 import openvino.runtime.opset13 as ops
 
 
@@ -256,6 +256,53 @@ def generate_model_with_memory(input_shape, data_type) -> openvino._pyopenvino.M
     return model
 
 
+def generate_concat_compiled_model(device, input_shape: List[int] = None, ov_type=Type.f32, numpy_dtype=np.float32):
+    if input_shape is None:
+        input_shape = [5]
+
+    core = Core()
+
+    params = []
+    params += [ops.parameter(input_shape, ov_type)]
+    if ov_type == Type.bf16:
+        params += [ops.parameter(input_shape, ov_type)]
+    else:
+        params += [ops.parameter(input_shape, numpy_dtype)]
+
+    model = Model(ops.concat(params, 0), params)
+    return core.compile_model(model, device)
+
+
+def generate_concat_compiled_model_with_data(device, input_shape: List[int] = None, ov_type=Type.f32, numpy_dtype=np.float32):
+    if input_shape is None:
+        input_shape = [5]
+
+    compiled_model = generate_concat_compiled_model(device, input_shape, ov_type, numpy_dtype)
+    request = compiled_model.create_infer_request()
+    tensor1 = Tensor(ov_type, input_shape)
+    tensor1.data[:] = np.array([6, 7, 8, 9, 0])
+    array1 = np.array([1, 2, 3, 4, 5], dtype=numpy_dtype)
+
+    return request, tensor1, array1
+
+
+def generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype):
+    input_shape = [1, 4]
+    param = ops.parameter(input_shape, ov_type)
+    model = Model(ops.abs(param), [param])
+    core = Core()
+    compiled_model = core.compile_model(model, device)
+
+    request = compiled_model.create_infer_request()
+
+    tensor1 = Tensor(ov_type, input_shape)
+    tensor1.data[:] = np.array([6, -7, -8, 9])
+
+    array1 = np.array([[-1, 2, 5, -3]]).astype(numpy_dtype)
+
+    return compiled_model, request, tensor1, array1
+
+
 def create_filename_for_test(test_name, tmp_path, is_xml_path=False, is_bin_path=False):
     """Return a tuple with automatically generated paths for xml and bin files.