diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp index d8163ce3a91005..1a460e83eef00d 100644 --- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp +++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp @@ -177,6 +177,16 @@ void regclass_CompiledModel(py::module m) { :rtype: openvino.runtime.Model )"); + cls.def("release_memory", + &ov::CompiledModel::release_memory, + py::call_guard(), + R"( + Release intermediate memory. + + This method forces the Compiled model to release memory allocated for intermediate structures, + e.g. caches, tensors, temporal buffers etc., when possible + )"); + cls.def_property_readonly("inputs", &ov::CompiledModel::inputs, R"( diff --git a/src/bindings/python/tests/test_runtime/test_async_infer_request.py b/src/bindings/python/tests/test_runtime/test_async_infer_request.py index ea1bf1a9f2a4af..f715d7b7b1238e 100644 --- a/src/bindings/python/tests/test_runtime/test_async_infer_request.py +++ b/src/bindings/python/tests/test_runtime/test_async_infer_request.py @@ -19,46 +19,12 @@ Tensor, ) from tests import skip_need_mock_op -from tests.utils.helpers import generate_image, get_relu_model - - -def concat_model_with_data(device, ov_type, numpy_dtype): - core = Core() - - input_shape = [5] - - params = [] - params += [ops.parameter(input_shape, ov_type)] - if ov_type == Type.bf16: - params += [ops.parameter(input_shape, ov_type)] - else: - params += [ops.parameter(input_shape, numpy_dtype)] - - model = Model(ops.concat(params, 0), params) - compiled_model = core.compile_model(model, device) - request = compiled_model.create_infer_request() - tensor1 = Tensor(ov_type, input_shape) - tensor1.data[:] = np.array([6, 7, 8, 9, 0]) - array1 = np.array([1, 2, 3, 4, 5], dtype=numpy_dtype) - - return request, tensor1, array1 - - -def abs_model_with_data(device, ov_type, numpy_dtype): - input_shape = [1, 4] - param = ops.parameter(input_shape, ov_type) - model = Model(ops.abs(param), [param]) - core = Core() - compiled_model = core.compile_model(model, device) - - request = compiled_model.create_infer_request() - - tensor1 = Tensor(ov_type, input_shape) - tensor1.data[:] = np.array([6, -7, -8, 9]) - - array1 = np.array([[-1, 2, 5, -3]]).astype(numpy_dtype) - - return compiled_model, request, tensor1, array1 +from tests.utils.helpers import ( + generate_image, + get_relu_model, + generate_concat_compiled_model_with_data, + generate_abs_compiled_model_with_data, +) @pytest.mark.parametrize("share_inputs", [True, False]) @@ -360,7 +326,7 @@ def callback(callbacks_info): ]) @pytest.mark.parametrize("share_inputs", [True, False]) def test_async_mixed_values(device, ov_type, numpy_dtype, share_inputs): - request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype) + request, tensor1, array1 = generate_concat_compiled_model_with_data(device=device, ov_type=ov_type, numpy_dtype=numpy_dtype) request.start_async([tensor1, array1], share_inputs=share_inputs) request.wait() @@ -380,7 +346,7 @@ def test_async_mixed_values(device, ov_type, numpy_dtype, share_inputs): ]) @pytest.mark.parametrize("share_inputs", [True, False]) def test_async_single_input(device, ov_type, numpy_dtype, share_inputs): - _, request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype) + _, request, tensor1, array1 = generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype) request.start_async(array1, share_inputs=share_inputs) request.wait() @@ -401,7 +367,7 @@ def __init__(self, array) -> None: def __array__(self): return np.array(self.data) - _, request, _, input_data = abs_model_with_data(device, Type.f32, np.single) + _, request, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.single) model_input_object = ArrayLikeObject(input_data.tolist()) model_input_list = [ArrayLikeObject(input_data.tolist())] # Test single array-like object in InferRequest().start_async() diff --git a/src/bindings/python/tests/test_runtime/test_compiled_model.py b/src/bindings/python/tests/test_runtime/test_compiled_model.py index f7e84daf281485..d6909fa94711d3 100644 --- a/src/bindings/python/tests/test_runtime/test_compiled_model.py +++ b/src/bindings/python/tests/test_runtime/test_compiled_model.py @@ -2,7 +2,6 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import sys import pytest import numpy as np @@ -10,6 +9,7 @@ get_relu_model, generate_image, generate_model_and_image, + generate_concat_compiled_model, generate_relu_compiled_model, generate_relu_compiled_model_with_config, encrypt_base64, @@ -18,6 +18,7 @@ from openvino import Model, Shape, Core, Tensor, serialize from openvino.runtime import ConstOutput +import openvino.runtime.opset13 as ops import openvino.properties as props @@ -276,3 +277,15 @@ def test_compiled_model_from_buffer_in_memory(request, tmp_path, device): compiled = core.compile_model(model=xml, weights=weights, device_name=device) _ = compiled([np.random.normal(size=list(input.shape)).astype(dtype=input.get_element_type().to_dtype()) for input in compiled.inputs]) + + +def test_memory_release(device): + compiled_model = generate_concat_compiled_model(device) + request = compiled_model.create_infer_request() + + input_tensor = Tensor(compiled_model.inputs[0].get_element_type(), compiled_model.inputs[0].get_shape()) + request.infer({0: input_tensor, 1: input_tensor}) + + # Release memory and perform inference again + compiled_model.release_memory() + request.infer({0: input_tensor, 1: input_tensor}) diff --git a/src/bindings/python/tests/test_runtime/test_sync_infer_request.py b/src/bindings/python/tests/test_runtime/test_sync_infer_request.py index de30955e6c7204..e97f8e6530cadc 100644 --- a/src/bindings/python/tests/test_runtime/test_sync_infer_request.py +++ b/src/bindings/python/tests/test_runtime/test_sync_infer_request.py @@ -25,17 +25,17 @@ from openvino.runtime import ProfilingInfo from openvino.preprocess import PrePostProcessor -from tests.utils.helpers import generate_image, get_relu_model +from tests.utils.helpers import ( + generate_image, + get_relu_model, + generate_concat_compiled_model_with_data, + generate_add_compiled_model, + generate_abs_compiled_model_with_data, +) def create_simple_request_and_inputs(device): - input_shape = [2, 2] - param_a = ops.parameter(input_shape, np.float32) - param_b = ops.parameter(input_shape, np.float32) - model = Model(ops.add(param_a, param_b), [param_a, param_b]) - - core = Core() - compiled_model = core.compile_model(model, device) + compiled_model = generate_add_compiled_model(device, input_shape=[2, 2]) request = compiled_model.create_infer_request() arr_1 = np.array([[1, 2], [3, 4]], dtype=np.float32) @@ -44,45 +44,6 @@ def create_simple_request_and_inputs(device): return request, arr_1, arr_2 -def concat_model_with_data(device, ov_type, numpy_dtype): - core = Core() - - input_shape = [5] - - params = [] - params += [ops.parameter(input_shape, ov_type)] - if ov_type == Type.bf16: - params += [ops.parameter(input_shape, ov_type)] - else: - params += [ops.parameter(input_shape, numpy_dtype)] - - model = Model(ops.concat(params, 0), params) - compiled_model = core.compile_model(model, device) - request = compiled_model.create_infer_request() - tensor1 = Tensor(ov_type, input_shape) - tensor1.data[:] = np.array([6, 7, 8, 9, 0]) - array1 = np.array([1, 2, 3, 4, 5], dtype=numpy_dtype) - - return request, tensor1, array1 - - -def abs_model_with_data(device, ov_type, numpy_dtype): - input_shape = [1, 4] - param = ops.parameter(input_shape, ov_type) - model = Model(ops.abs(param), [param]) - core = Core() - compiled_model = core.compile_model(model, device) - - request = compiled_model.create_infer_request() - - tensor1 = Tensor(ov_type, input_shape) - tensor1.data[:] = np.array([6, -7, -8, 9]) - - array1 = np.array([[-1, 2, 5, -3]]).astype(numpy_dtype) - - return compiled_model, request, tensor1, array1 - - def test_get_profiling_info(device): core = Core() param = ops.parameter([1, 3, 32, 32], np.float32, name="data") @@ -347,7 +308,7 @@ def test_infer_mixed_keys(device, share_inputs): ]) @pytest.mark.parametrize("share_inputs", [True, False]) def test_infer_mixed_values(device, ov_type, numpy_dtype, share_inputs): - request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype) + request, tensor1, array1 = generate_concat_compiled_model_with_data(device=device, ov_type=ov_type, numpy_dtype=numpy_dtype) request.infer([tensor1, array1], share_inputs=share_inputs) @@ -367,7 +328,7 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype, share_inputs): ]) @pytest.mark.parametrize("share_inputs", [True, False]) def test_infer_single_input(device, ov_type, numpy_dtype, share_inputs): - _, request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype) + _, request, tensor1, array1 = generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype) request.infer(array1, share_inputs=share_inputs) assert np.array_equal(request.get_output_tensor().data, np.abs(array1)) @@ -605,7 +566,7 @@ def __init__(self, array) -> None: def __array__(self): return np.array(self.data) - _, request, _, input_data = abs_model_with_data(device, Type.f32, np.single) + _, request, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.single) model_input_object = ArrayLikeObject(input_data.tolist()) model_input_list = [ArrayLikeObject(input_data.tolist())] model_input_dict = {0: ArrayLikeObject(input_data.tolist())} @@ -772,7 +733,7 @@ def test_not_writable_inputs_infer(device, share_inputs, input_data, change_flag @pytest.mark.parametrize("share_outputs", [True, False]) @pytest.mark.parametrize("is_positional", [True, False]) def test_compiled_model_share_memory(device, share_inputs, share_outputs, is_positional): - compiled, _, _, input_data = abs_model_with_data(device, Type.f32, np.float32) + compiled, _, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.float32) if is_positional: results = compiled(input_data, share_inputs=share_inputs, share_outputs=share_outputs) @@ -800,7 +761,7 @@ def test_compiled_model_share_memory(device, share_inputs, share_outputs, is_pos @pytest.mark.parametrize("share_outputs", [True, False]) @pytest.mark.parametrize("is_positional", [True, False]) def test_infer_request_share_memory(device, share_inputs, share_outputs, is_positional): - _, request, _, input_data = abs_model_with_data(device, Type.f32, np.float32) + _, request, _, input_data = generate_abs_compiled_model_with_data(device, Type.f32, np.float32) if is_positional: results = request.infer(input_data, share_inputs=share_inputs, share_outputs=share_outputs) diff --git a/src/bindings/python/tests/utils/helpers.py b/src/bindings/python/tests/utils/helpers.py index 098e968b64e241..2ea00484e9840c 100644 --- a/src/bindings/python/tests/utils/helpers.py +++ b/src/bindings/python/tests/utils/helpers.py @@ -13,7 +13,7 @@ from pathlib import Path import openvino -from openvino import Model, Core, Shape +from openvino import Model, Core, Shape, Tensor, Type import openvino.runtime.opset13 as ops @@ -256,6 +256,53 @@ def generate_model_with_memory(input_shape, data_type) -> openvino._pyopenvino.M return model +def generate_concat_compiled_model(device, input_shape: List[int] = None, ov_type=Type.f32, numpy_dtype=np.float32): + if input_shape is None: + input_shape = [5] + + core = Core() + + params = [] + params += [ops.parameter(input_shape, ov_type)] + if ov_type == Type.bf16: + params += [ops.parameter(input_shape, ov_type)] + else: + params += [ops.parameter(input_shape, numpy_dtype)] + + model = Model(ops.concat(params, 0), params) + return core.compile_model(model, device) + + +def generate_concat_compiled_model_with_data(device, input_shape: List[int] = None, ov_type=Type.f32, numpy_dtype=np.float32): + if input_shape is None: + input_shape = [5] + + compiled_model = generate_concat_compiled_model(device, input_shape, ov_type, numpy_dtype) + request = compiled_model.create_infer_request() + tensor1 = Tensor(ov_type, input_shape) + tensor1.data[:] = np.array([6, 7, 8, 9, 0]) + array1 = np.array([1, 2, 3, 4, 5], dtype=numpy_dtype) + + return request, tensor1, array1 + + +def generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype): + input_shape = [1, 4] + param = ops.parameter(input_shape, ov_type) + model = Model(ops.abs(param), [param]) + core = Core() + compiled_model = core.compile_model(model, device) + + request = compiled_model.create_infer_request() + + tensor1 = Tensor(ov_type, input_shape) + tensor1.data[:] = np.array([6, -7, -8, 9]) + + array1 = np.array([[-1, 2, 5, -3]]).astype(numpy_dtype) + + return compiled_model, request, tensor1, array1 + + def create_filename_for_test(test_name, tmp_path, is_xml_path=False, is_bin_path=False): """Return a tuple with automatically generated paths for xml and bin files.