Merge branch 'master' into mvafin/pt_fe/logging

openvinotoolkit · Sep 23, 2024 · a295706 · a295706
2 parents 0c4d29f + e062d5c
commit a295706
Show file tree

Hide file tree

Showing 108 changed files with 9,232 additions and 5,977 deletions.
diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json
@@ -74,5 +74,9 @@
     {
         "error_text": "json.decoder.JSONDecodeError: Unterminated string starting at",
         "ticket": 151796
+    },
+    {
+        "error_text": "lost communication with the server",
+        "ticket": 152565
     }
 ]
diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml
@@ -52,7 +52,7 @@ jobs:
 
       - name: Setup Node ${{ env.NODE_VERSION }}
         if: runner.os != 'Linux' # Node is already installed in the Docker image
-        uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3
+        uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
         with:
           node-version: ${{ env.NODE_VERSION }}
 

diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml
@@ -299,7 +299,7 @@ jobs:
           python3 ${OPENVINO_REPO}/docs/articles_en/assets/snippets/main.py
 
       - name: Python API Tests -- numpy>=2.0.0
-        if: ${{ fromJSON(inputs.affected-components).Python_API.test && inputs.python-version != '3.12' }} # Ticket: 152242
+        if: ${{ fromJSON(inputs.affected-components).Python_API.test }}
         run: |
           python3 -m pip uninstall -y numpy
           python3 -m pip install "numpy>=2.0.0,<2.1.0"

diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml
@@ -169,7 +169,7 @@ jobs:
           path: ${{ env.OPENVINO_JS_LIBS_DIR }}
 
       - name: Setup Node ${{ env.NODE_VERSION }}
-        uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3
+        uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
         with:
           node-version: ${{ env.NODE_VERSION }}
 

diff --git a/.gitmodules b/.gitmodules
@@ -72,8 +72,8 @@
 [submodule "src/plugins/intel_cpu/thirdparty/mlas"]
 	path = src/plugins/intel_cpu/thirdparty/mlas
 	url = https://github.com/openvinotoolkit/mlas.git
-[submodule "src/plugins/intel_npu/thirdparty/level-zero"]
-	path = src/plugins/intel_npu/thirdparty/level-zero
+[submodule "thirdparty/level_zero/level-zero"]
+	path = thirdparty/level_zero/level-zero
 	url = https://github.com/oneapi-src/level-zero.git
 [submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"]
 	path = src/plugins/intel_npu/thirdparty/level-zero-ext

diff --git a/docs/articles_en/about-openvino/key-features.rst b/docs/articles_en/about-openvino/key-features.rst
@@ -5,65 +5,65 @@ Easy Integration
 #########################
 
 | :doc:`Support for multiple frameworks <../openvino-workflow/model-preparation/convert-model-to-ir>`
-| Use deep learning models from PyTorch, TensorFlow, TensorFlow Lite, PaddlePaddle, and ONNX
-  directly or convert them to the optimized OpenVINO IR format for improved performance.
+|     Use deep learning models from PyTorch, TensorFlow, TensorFlow Lite, PaddlePaddle, and ONNX
+      directly or convert them to the optimized OpenVINO IR format for improved performance.
 
 | :doc:`Close integration with PyTorch <../openvino-workflow/torch-compile>`
-| For PyTorch-based applications, specify OpenVINO as a backend using
-  :doc:`torch.compile <../openvino-workflow/torch-compile>` to improve model inference. Apply
-  OpenVINO optimizations to your PyTorch models directly with a single line of code.
+|     For PyTorch-based applications, specify OpenVINO as a backend using
+      :doc:`torch.compile <../openvino-workflow/torch-compile>` to improve model inference. Apply
+      OpenVINO optimizations to your PyTorch models directly with a single line of code.
 
 | :doc:`GenAI Out Of The Box <../learn-openvino/llm_inference_guide/genai-guide>`
-| With the genAI flavor of OpenVINO, you can run generative AI with just a couple lines of code.
-  Check out the GenAI guide for instructions on how to do it.
+|     With the genAI flavor of OpenVINO, you can run generative AI with just a couple lines of code.
+      Check out the GenAI guide for instructions on how to do it.
 
 | `Python / C++ / C / NodeJS APIs <https://docs.openvino.ai/2024/api/api_reference.html>`__
-| OpenVINO offers the C++ API as a complete set of available methods. For less resource-critical
-  solutions, the Python API provides almost full coverage, while C and NodeJS ones are limited
-  to the methods most basic for their typical environments. The NodeJS API, is still in its
-  early and active development.
+|     OpenVINO offers the C++ API as a complete set of available methods. For less resource-critical
+      solutions, the Python API provides almost full coverage, while C and NodeJS ones are limited
+      to the methods most basic for their typical environments. The NodeJS API, is still in its
+      early and active development.
 
 | :doc:`Open source and easy to extend <../about-openvino/contributing>`
-| If you need a particular feature or inference accelerator to be supported, you are free to file
-  a feature request or develop new components specific to your projects yourself. As open source,
-  OpenVINO may be used and modified freely. See the extensibility guide for more information on
-  how to adapt it to your needs.
+|     If you need a particular feature or inference accelerator to be supported, you are free to file
+      a feature request or develop new components specific to your projects yourself. As open source,
+      OpenVINO may be used and modified freely. See the extensibility guide for more information on
+      how to adapt it to your needs.
 
 Deployment
 #########################
 
 | :doc:`Local or remote <../openvino-workflow>`
-| Integrate the OpenVINO runtime directly with your application to run inference locally or use
-  `OpenVINO Model Server <https://github.com/openvinotoolkit/model_server>`__ to shift the inference
-  workload to a remote system, a separate server or a Kubernetes environment. For serving,
-  OpenVINO is also integrated with `vLLM <https://docs.vllm.ai/en/stable/getting_started/openvino-installation.html>`__
-  and `Triton <https://github.com/triton-inference-server/openvino_backend>`__ services.
+|     Integrate the OpenVINO runtime directly with your application to run inference locally or use
+      `OpenVINO Model Server <https://github.com/openvinotoolkit/model_server>`__ to shift the inference
+      workload to a remote system, a separate server or a Kubernetes environment. For serving,
+      OpenVINO is also integrated with `vLLM <https://docs.vllm.ai/en/stable/getting_started/openvino-installation.html>`__
+      and `Triton <https://github.com/triton-inference-server/openvino_backend>`__ services.
 
 | :doc:`Scalable and portable <release-notes-openvino/system-requirements>`
-| Write an application once, deploy it anywhere, always making the most out of your hardware setup.
-  The automatic device selection mode gives you the ultimate deployment flexibility on all major
-  operating systems. Check out system requirements.
+|     Write an application once, deploy it anywhere, always making the most out of your hardware setup.
+      The automatic device selection mode gives you the ultimate deployment flexibility on all major
+      operating systems. Check out system requirements.
 
 | **Light-weight**
-| Designed with minimal external dependencies, OpenVINO does not bloat your application
-  and simplifies installation and dependency management. The custom compilation for your specific
-  model(s) may further reduce the final binary size.
+|     Designed with minimal external dependencies, OpenVINO does not bloat your application
+      and simplifies installation and dependency management. The custom compilation for your specific
+      model(s) may further reduce the final binary size.
 
 Performance
 #########################
 
 | :doc:`Model Optimization <../openvino-workflow/model-optimization>`
-| Optimize your deep learning models with NNCF, using various training-time and post-training
-  compression methods, such as pruning, sparsity, quantization, and weight compression. Make
-  your models take less space, run faster, and use less resources.
+|     Optimize your deep learning models with NNCF, using various training-time and post-training
+      compression methods, such as pruning, sparsity, quantization, and weight compression. Make
+      your models take less space, run faster, and use less resources.
 
 | :doc:`Top performance <../about-openvino/performance-benchmarks>`
-| OpenVINO is optimized to work with Intel hardware, delivering confirmed high performance for
-  hundreds of models. Explore OpenVINO Performance Benchmarks to discover the optimal hardware
-  configurations and plan your AI deployment based on verified data.
+|     OpenVINO is optimized to work with Intel hardware, delivering confirmed high performance for
+      hundreds of models. Explore OpenVINO Performance Benchmarks to discover the optimal hardware
+      configurations and plan your AI deployment based on verified data.
 
 | :doc:`Enhanced App Start-Up Time <../openvino-workflow/running-inference/optimize-inference>`
-| If you need your application to launch immediately, OpenVINO will reduce first-inference latency,
-  running inference on CPU until a more suited device is ready to take over. Once a model
-  is compiled for inference, it is also cached, improving the start-up time even more.
+|     If you need your application to launch immediately, OpenVINO will reduce first-inference latency,
+      running inference on CPU until a more suited device is ready to take over. Once a model
+      is compiled for inference, it is also cached, improving the start-up time even more.
 
diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst
@@ -11,9 +11,9 @@ Performance Benchmarks
    :hidden:
 
    Efficient LLMs for AI PC <performance-benchmarks/generative-ai-performance>
-   performance-benchmarks/performance-benchmarks-faq
+   Performance Information F.A.Q. <performance-benchmarks/performance-benchmarks-faq>
    OpenVINO Accuracy <performance-benchmarks/model-accuracy-int8-fp32>
-   performance-benchmarks/getting-performance-numbers
+   Getting Performance Numbers <performance-benchmarks/getting-performance-numbers>
 
 
 This page presents benchmark results for
@@ -22,7 +22,7 @@ and :doc:`OpenVINO Model Server <../ovms_what_is_openvino_model_server>`, for a
 selection of public neural networks and Intel® devices. The results may help you decide which
 hardware to use in your applications or plan AI workload for the hardware you have already
 implemented in your solutions. Click the buttons below to see the chosen benchmark data.
-For more detailed view of performance numbers for generative AI models, check the
+For a more detailed view of performance numbers for generative AI models, check the
 :doc:`Generative AI Benchmark Results <./performance-benchmarks/generative-ai-performance>`
 
 .. grid:: 1 1 2 2
@@ -49,40 +49,18 @@ For more detailed view of performance numbers for generative AI models, check th
          :material-regular:`bar_chart;1.4em` OVMS Benchmark Graphs
 
 
-Please visit the tabs below for more information on key performance indicators and workload parameters.
+Key performance indicators and workload parameters.
 
 .. tab-set::
 
    .. tab-item:: Throughput
       :sync: throughput
 
       For Vision and NLP Models this measures the number of inferences delivered within a latency threshold
-      (for example, number of Frames Per Second - FPS). 
-      For GenAI (or Large Language Models) this measures the token rate after the first token aka. 2nd token 
-      throughput rate which is presented as tokens/sec. Please click on the "Workload Parameters" tab to 
-      learn more about input/output token lengths, etc. 
-
-   .. tab-item:: Value
-      :sync: value
-
-      While throughput is important, what is more critical in edge AI deployments is
-      the performance efficiency or performance-per-cost. Application performance in
-      throughput per dollar of system cost is the best measure of value. The value KPI is
-      calculated as “Throughput measured as inferences per second / price of inference engine”.
-      This means for a 2 socket system 2x the price of a CPU is used. Prices are as per
-      date of benchmarking and sources can be found as links in the Hardware Platforms (PDF)
-      description below.
-
-   .. tab-item:: Efficiency
-      :sync: efficiency
-
-      System power is a key consideration from the edge to the data center. When selecting
-      deep learning solutions, power efficiency (throughput/watt) is a critical factor to
-      consider. Intel designs provide excellent power efficiency for running deep learning
-      workloads. The efficiency KPI is calculated as “Throughput measured as inferences per
-      second / TDP of inference engine”. This means for a 2 socket system 2x the power
-      dissipation (TDP) of a CPU is used. TDP-values are as per date of benchmarking and sources
-      can be found as links in the Hardware Platforms (PDF) description below.
+      (for example, number of Frames Per Second - FPS).
+      For GenAI (or Large Language Models) this measures the token rate after the first token aka. 2nd token
+      throughput rate which is presented as tokens/sec. Please click on the "Workload Parameters" tab to
+      learn more about input/output token lengths, etc.
 
    .. tab-item:: Latency
       :sync: latency
@@ -96,7 +74,7 @@ Please visit the tabs below for more information on key performance indicators a
       example an industrial robot's response to actions in its environment or obstacle avoidance
       for autonomous vehicles.
       For Transformer models like Stable-Diffusion this measures the time it takes to convert the prompt
-      or input text into a finished image. It is presented in seconds. 
+      or input text into a finished image. It is presented in seconds.
 
    .. tab-item:: Workload Parameters
       :sync: workloadparameters
@@ -130,21 +108,21 @@ For a listing of all platforms and configurations used for testing, refer to the
 
    .. grid-item::
 
-      .. button-link:: ../_static/benchmarks_files/OV-2024.3-platform_list.pdf
+      .. button-link:: ../_static/benchmarks_files/OV-2024.4-platform_list.pdf
          :color: primary
          :outline:
          :expand:
 
          :material-regular:`download;1.5em` Click for Hardware Platforms [PDF]
 
-      .. button-link:: ../_static/benchmarks_files/OV-2024.3-system-info-detailed.xlsx
+      .. button-link:: ../_static/benchmarks_files/OV-2024.4-system-info-detailed.xlsx
          :color: primary
          :outline:
          :expand:
 
          :material-regular:`download;1.5em` Click for Configuration Details [XLSX]
 
-      .. button-link:: ../_static/benchmarks_files/OV-2024.3-Performance-Data.xlsx
+      .. button-link:: ../_static/benchmarks_files/OV-2024.4-Performance-Data.xlsx
          :color: primary
          :outline:
          :expand:
@@ -159,9 +137,9 @@ processing) and then reports on the inferences per second (or Frames Per Second)
 OpenVINO™ Model Server (OVMS) employs the Intel® Distribution of OpenVINO™ toolkit runtime
 libraries and exposes a set of models via a convenient inference API over gRPC or HTTP/REST.
 Its benchmark results are measured with the configuration of multiple-clients-single-server,
-using two hardware platforms connected by ethernet. Network bandwidth depends on both, platforms
-and models under investigation. It is set not to be a bottleneck for workload intensity. The
-connection is dedicated only to measuring performance.
+using two hardware platforms connected by ethernet. Network bandwidth depends on both platforms
+and models used. It is set not to be a bottleneck for workload intensity. The connection is
+dedicated only to measuring performance.
 
 .. dropdown:: See more details about OVMS benchmark setup
 

diff --git a/...articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/...articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst
@@ -25,20 +25,11 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs.
          :header-rows: 1
          :file:  ../../_static/download/llm_models.csv
 
-   .. tab-item:: OpenVINO Model Server
-
-      .. csv-table::
-         :class: modeldata stripe
-         :name: supportedModelsTableOvms
-         :header-rows: 1
-         :file:  ../../_static/download/llm_models_ovms.csv
-
-
 
 For complete information on the system config, see:
-`Hardware Platforms [PDF] <https://docs.openvino.ai/2024/_static/benchmarks_files/OV-2024.3-platform_list.pdf>`__
+`Hardware Platforms [PDF] <https://docs.openvino.ai/2024/_static/benchmarks_files/OV-2024.4-platform_list.pdf>`__
 
-To view the data in an editable form, you can download the .csv files here:
+To view the data in an editable form, you can download the .csv file here:
 
 .. grid:: 1 1 2 2
    :gutter: 4
@@ -52,9 +43,4 @@ To view the data in an editable form, you can download the .csv files here:
 
          :material-regular:`download;1.5em` Click for OpenVINO LLM results [CSV]
 
-      .. button-link:: ../../_static/download/llm_models_ovms.csv
-         :color: primary
-         :outline:
-         :expand:
 
-         :material-regular:`download;1.5em` Click for OpenVINO Model Server results [CSV]