Merge branch 'master' into as/npuw_online_part_tests

openvinotoolkit · Oct 2, 2024 · bf2b8d8 · bf2b8d8
2 parents f36f062 + 205c11e
commit bf2b8d8
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 36 deletions.
diff --git a/.../openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/.../openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst
@@ -11,6 +11,7 @@ NPU Device
    :hidden:
 
    npu-device/remote-tensor-api-npu-plugin
+   npu-device/batching-on-npu-plugin
 
 
 The Neural Processing Unit is a low-power hardware solution, introduced with the

diff --git a/...ing-inference/inference-devices-and-modes/npu-device/batching-on-npu-plugin.rst b/...ing-inference/inference-devices-and-modes/npu-device/batching-on-npu-plugin.rst
@@ -0,0 +1,37 @@
+NPU Plugin Batching 
+===============================
+
+
+.. meta::
+   :description: OpenVINO™ NPU plugin supports batching
+                 either by executing concurrent inferences or by
+                 relying on native compiler support for batching.
+
+OpenVINO™ NPU plugin supports batching either by executing concurrent inferences or by relying on native compiler support for batching.
+
+First, the NPU plugin checks if the following conditions are met:
+
+* The batch size is on the first axis.
+* All inputs and outputs have the same batch size.
+* The model does not contain states.
+
+**If the conditions are met**, the NPU plugin attempts to compile and execute the original model with batch_size forced to 1. This approach is due to current compiler limitations and ongoing work to improve performance for batch_size greater than one.
+If the compilation is successful, the plugin detects a difference in batch size between the original model layout (with a batch size set to N)
+and the transformed/compiled layout (with a batch size set to 1). Then it executes the following steps:
+
+1. Internally constructs multiple command lists, one for each input.
+2. Executes each command list for the proper offsets of input/output buffers.
+3. Notifies the user of the completion of the inference request after all command lists have been executed.
+
+This concurrency-based batching mode is transparent to the application. A single inference request handles all inputs from the batch.
+While performance may be lower compared to regular batching (based on native compiler support), this mode provides basic batching functionality for use either with older drivers
+or when the model cannot yet be compiled with a batch size larger than one.
+
+**If the conditions are not met**, the NPU plugin tries to compile and execute the original model with the given
+batch_size to N as any other regular model.
+
+.. note::
+
+   With future performance improvements and support for compiling multiple models with a batch size larger 
+   than one, the default order will change. NPU will try first to compile and execute the original model with the 
+   given batch size and fall back to concurrent batching if compilation fails.
diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts
@@ -21,6 +21,8 @@ type elementTypeString =
   | 'f32'
   | 'string';
 
+type OVAny = string | number | boolean;
+
 /**
  * Core represents an OpenVINO runtime Core entity.
  *
@@ -48,7 +50,7 @@ interface Core {
   compileModel(
     model: Model,
     deviceName: string,
-    config?: { [propertyName: string]: string },
+    config?: Record<string, OVAny>,
   ): Promise<CompiledModel>;
   /**
    * Asynchronously reads a model and creates a compiled model
@@ -67,7 +69,7 @@ interface Core {
   compileModel(
     modelPath: string,
     deviceName: string,
-    config?: { [propertyName: string]: string },
+    config?: Record<string, OVAny>,
   ): Promise<CompiledModel>;
   /**
    * A synchronous version of {@link Core.compileModel}.
@@ -76,7 +78,7 @@ interface Core {
   compileModelSync(
     model: Model,
     deviceName: string,
-    config?: { [propertyName: string]: string },
+    config?: Record<string, OVAny>,
   ): CompiledModel;
   /**
    * A synchronous version of {@link Core.compileModel}.
@@ -85,7 +87,7 @@ interface Core {
   compileModelSync(
     modelPath: string,
     deviceName: string,
-    config?: { [propertyName: string]: string },
+    config?: Record<string, OVAny>,
   ): CompiledModel;
   /**
    * It returns a list of available inference devices.
@@ -101,7 +103,7 @@ interface Core {
    * It gets the properties dedicated to device behaviour.
    * @param propertyName A property name.
    */
-  getProperty(propertyName: string): string | number | boolean;
+  getProperty(propertyName: string): OVAny;
 
   /**
    * It gets the properties dedicated to device behaviour.
@@ -111,7 +113,7 @@ interface Core {
   getProperty(
     deviceName: string,
     propertyName: string,
-  ): string | number | boolean;
+  ): OVAny;
   /**
    * It returns information on the version of device plugins.
    * @param deviceName A device name to identify a plugin.
@@ -135,7 +137,7 @@ interface Core {
   importModel(
     modelStream: Buffer,
     device: string,
-    config?: { [key: string]: string | number | boolean },
+    config?: Record<string, OVAny>,
   ): Promise<CompiledModel>;
   /**
    * A synchronous version of {@link Core.importModel}.
@@ -144,7 +146,7 @@ interface Core {
   importModelSync(
     modelStream: Buffer,
     device: string,
-    config?: { [key: string]: string | number | boolean },
+    config?: Record<string, OVAny>,
   ): CompiledModel;
   /**
    * It reads models from the IR / ONNX / PDPD / TF and TFLite formats.
@@ -197,16 +199,13 @@ interface Core {
    * It sets the properties.
    * @param properties An object with the property name - property value pairs.
    */
-  setProperty(properties: { [key: string]: string | number | boolean }): void;
+  setProperty(properties: Record<string, OVAny>): void;
   /**
    * It sets the properties for a device.
    * @param deviceName The name of a device.
    * @param properties An object with the property name - property value pairs.
    */
-  setProperty(
-    deviceName: string,
-    properties: { [key: string]: string | number | boolean },
-  ): void;
+  setProperty(deviceName: string, properties: Record<string, OVAny>): void;
   /**
    * It queries the device if it supports specified model with the specified
    * properties.
@@ -218,8 +217,8 @@ interface Core {
   queryModel(
     model: Model,
     deviceName: string,
-    properties?: {[key: string]: string | number | boolean},
-  ): {[key: string]: string | number | boolean};
+    properties?: Record<string, OVAny>,
+  ): { [key: string]: string };
 }
 interface CoreConstructor {
   new (): Core;
@@ -325,7 +324,7 @@ interface CompiledModel {
    * @param propertyName A string to get the property value.
    * @returns The property value.
    */
-  getProperty(propertyName: string): string | number | boolean;
+  getProperty(propertyName: string): OVAny;
   /**
    * It creates an inference request object used to infer the compiled model.
    * @return {InferRequest}
@@ -380,9 +379,7 @@ interface CompiledModel {
    * @param property An object with the key-value pairs.
    * (property name, property value)
    */
-  setProperty(properties: {
-    [propertyName: string]: string | number | boolean;
-  }): void;
+  setProperty(properties: Record<string, OVAny>): void;
 }
 
 /**

diff --git a/src/bindings/js/node/tests/unit/core.test.js b/src/bindings/js/node/tests/unit/core.test.js
@@ -12,11 +12,11 @@ describe('ov.Core tests', () => {
   before(async () => {
     await isModelAvailable(testModels.testModelFP32);
   });
- 
+
   beforeEach(() => {
     core = new ov.Core();
   });
-  
+
   it('Core.setProperty()', () => {
     const tmpDir = '/tmp';
 
@@ -83,29 +83,29 @@ describe('ov.Core tests', () => {
   it('Core.queryModel() with empty parameters should throw an error', () => {
     assert.throws(
       () => core.queryModel().then(),
-      /'queryModel' method called with incorrect parameters./
-    )
+      /'queryModel' method called with incorrect parameters./,
+    );
   });
 
   it('Core.queryModel() with less arguments should throw an error', () => {
     assert.throws(
-      () => core.queryModel("Unexpected Argument").then(),
-      /'queryModel' method called with incorrect parameters./
-    )
+      () => core.queryModel('Unexpected Argument').then(),
+      /'queryModel' method called with incorrect parameters./,
+    );
   });
 
   it('Core.queryModel() with incorrect arguments should throw an error', () => {
     const model = core.readModelSync(getModelPath().xml);
     assert.throws(
-      () => core.queryModel(model, "arg1", "arg2").then(),
-      /'queryModel' method called with incorrect parameters./
-    )
+      () => core.queryModel(model, 'arg1', 'arg2').then(),
+      /'queryModel' method called with incorrect parameters./,
+    );
   });
 
   it('Core.queryModel() should have device in the result values', () => {
     const model = core.readModelSync(getModelPath().xml);
     const device = 'CPU';
-    const query_model = core.queryModel(model, device);
-    assert(Object.values(query_model).includes(device));
+    const queryModel = core.queryModel(model, device);
+    assert(Object.values(queryModel).includes(device));
   });
 });
diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp
@@ -494,14 +494,12 @@ bool ov::interval_bound_evaluator(const Node* node,
             vector_of_output_variants.emplace_back(output.get_element_type(), output.get_shape());
         }
 
-        node->evaluate(vector_of_output_variants, input_variant);
+        if (!node->evaluate(vector_of_output_variants, input_variant)) {
+            return false;
+        };
 
         TensorVector vector_of_unsqueezed_output_variants;
         for (const auto& output : vector_of_output_variants) {
-            if (!output) {
-                return false;
-            }
-
             auto unsqueezed_shape = output.get_shape();
             unsqueezed_shape.insert(unsqueezed_shape.begin(), 1);