diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml index 007db90d13251b..cd111d617ddc1b 100644 --- a/.github/actions/smart-ci/action.yml +++ b/.github/actions/smart-ci/action.yml @@ -43,6 +43,10 @@ inputs: description: "Comma-separated list of patterns (fnmatch-style). If PR has only matching files changed, return indicator that CI can be skipped" required: false + enable_for_org: + description: "Enables running workflows for a given organization; triggers from other orgs are skipped" + required: false + default: "openvinotoolkit" outputs: all_components: @@ -99,6 +103,7 @@ runs: -c "${{ inputs.components_config }}" \ -m "${{ inputs.components_config_schema }}" \ -l "${{ inputs.labeler_config }}" \ + --enable_for_org "${{ inputs.enable_for_org }}" \ --skip-when-only-listed-labels-set "${{ inputs.skip_when_only_listed_labels_set }}" \ --skip-when-only-listed-files-changed "${{ inputs.skip_when_only_listed_files_changed }}" shell: bash diff --git a/.github/actions/smart-ci/smart_ci.py b/.github/actions/smart-ci/smart_ci.py index 1c8558f4779108..e922d3d2ef5263 100644 --- a/.github/actions/smart-ci/smart_ci.py +++ b/.github/actions/smart-ci/smart_ci.py @@ -4,6 +4,8 @@ import os import re import argparse +import sys + import yaml import json import jsonschema @@ -146,6 +148,8 @@ def parse_args(): parser.add_argument('--skip-when-only-listed-files-changed', help="Comma-separated list of patterns (fnmatch-style). If PR has only matching files changed, " "return indicator that CI can be skipped") + parser.add_argument('--enable_for_org', default='openvinotoolkit', + help='Enable running workflows for a given organization; triggers from other orgs are skipped') args = parser.parse_args() return args @@ -176,6 +180,13 @@ def main(): components_config = yaml.safe_load(config) owner, repository = args.repo.split('/') + + if owner != args.enable_for_org: + logger.info(f"Running workflows is enabled only for repos in {args.enable_for_org} organization. " + f"The current workflow was initiated from other org: {owner}, skipping") + set_github_output("skip_workflow", "True") + sys.exit(0) + gh_api = GhApi(owner=owner, repo=repository, token=os.getenv("GITHUB_TOKEN")) pr = gh_api.pulls.get(args.pr) if args.pr else None diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 35c18b43e1f95d..bbd737dff0e569 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -43,6 +43,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -95,7 +96,6 @@ jobs: VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache' VCPKG_FORCE_SYSTEM_BINARIES: '1' SCCACHE_AZURE_KEY_PREFIX: android_arm64 - if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index e8fb4902e37612..3504b8cf2fdeb1 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -46,6 +46,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -94,7 +95,6 @@ jobs: ANDROID_SDK_VERSION: 29 ANDROID_ABI_CONFIG: x86_64 SCCACHE_AZURE_KEY_PREFIX: android_x64 - if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml index f466715f5cfcd3..b13c6736e27b0b 100644 --- a/.github/workflows/assign_issue.yml +++ b/.github/workflows/assign_issue.yml @@ -15,6 +15,7 @@ jobs: permissions: issues: write timeout-minutes: 10 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: take an issue uses: bdougie/take-action@1439165ac45a7461c2d89a59952cd7d941964b87 # v1.6.1 diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 7b380530cfaecd..d6d3a63e431ecd 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -15,6 +15,7 @@ permissions: read-all jobs: Build_Doc: runs-on: ubuntu-20.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/check_pr_commits.yml b/.github/workflows/check_pr_commits.yml index 75d18695c6e2f9..690b85046a108b 100644 --- a/.github/workflows/check_pr_commits.yml +++ b/.github/workflows/check_pr_commits.yml @@ -6,6 +6,7 @@ permissions: read-all jobs: Checks: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml index 3f1c413253bdb5..d198f709e58316 100644 --- a/.github/workflows/cleanup_caches.yml +++ b/.github/workflows/cleanup_caches.yml @@ -12,6 +12,7 @@ jobs: Cleanup_PIP: name: Cleanup PIP cache runs-on: aks-linux-2-cores-8gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: @@ -38,6 +39,7 @@ jobs: Cleanup_ccache_lin: name: Cleanup Linux ccache runs-on: aks-linux-2-cores-8gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index ae5f9ee25624d3..82daec9ee791f9 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -25,6 +25,7 @@ jobs: matrix: os: ['ubuntu-22.04', 'macos-latest', 'windows-latest'] runs-on: ${{ matrix.os }} + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 2fbcc6b5f87761..c2db68edca3956 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -12,6 +12,7 @@ jobs: runs-on: ubuntu-22.04 permissions: pull-requests: write + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 1d2f8e3ff54820..0de4cb045bfeb4 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -31,6 +31,7 @@ jobs: run: shell: bash runs-on: aks-linux-16-cores-32gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 env: diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index 84496a50b9a480..7fab775b45b886 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -48,6 +48,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-16-cores-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 diff --git a/.github/workflows/dependency_review.yml b/.github/workflows/dependency_review.yml index fd2d4f02d57368..e73acd765ed7f9 100644 --- a/.github/workflows/dependency_review.yml +++ b/.github/workflows/dependency_review.yml @@ -6,6 +6,7 @@ permissions: read-all jobs: dependency-review: runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/fedora_29.yml b/.github/workflows/fedora_29.yml index a79b0f86af28f3..b3a540fb287bd8 100644 --- a/.github/workflows/fedora_29.yml +++ b/.github/workflows/fedora_29.yml @@ -48,6 +48,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -71,7 +72,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/files_size.yml b/.github/workflows/files_size.yml index 065fe71bc75893..6a006cc7d4ada8 100644 --- a/.github/workflows/files_size.yml +++ b/.github/workflows/files_size.yml @@ -10,6 +10,7 @@ permissions: read-all jobs: Check_Files_Size: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 7c0282b5519705..2e557ebc5ef477 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -52,6 +52,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-16-cores-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -75,7 +76,6 @@ jobs: Build: needs: [ Docker, Smart_CI ] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-arm' diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index f0ce141e8c004f..f9359e323d4baf 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -53,6 +53,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -104,7 +105,7 @@ jobs: SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat MODELS_PATH: /__w/openvino/openvino/testdata SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release_faster_build - if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }} + if: ${{ github.event_name != 'merge_group' }} steps: - name: Clone OpenVINO diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 6c26bb8b3859f0..3de8b56ad773cc 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -44,6 +44,7 @@ jobs: Docker: needs: Smart_CI runs-on: aks-linux-4-cores-16gb-docker-build + if: "!needs.smart_ci.outputs.skip_workflow" container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -88,7 +89,7 @@ jobs: CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp CCACHE_MAXSIZE: 2G - if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }} + if: ${{ github.event_name != 'merge_group' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 9105b3b00bf84c..367fce8eb98683 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -25,6 +25,7 @@ jobs: run: shell: bash runs-on: aks-linux-16-cores-32gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 715380811d6870..d60ef4608093b2 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -78,6 +78,7 @@ jobs: INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install BUILD_DIR: ${{ github.workspace }}/build + if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 2615fe16316ea7..73890d1284222e 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -78,6 +78,7 @@ jobs: INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install BUILD_DIR: ${{ github.workspace }}/build + if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index 7bbf3ba28001b9..9a112e7e53ced2 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -21,6 +21,7 @@ permissions: read-all jobs: Pylint-UT: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index a296f93e5a5187..1e2668f26cb579 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -16,6 +16,7 @@ permissions: read-all jobs: Pylint-UT: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index db0918d0eb61c0..75a8a1b83f03d0 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -25,6 +25,7 @@ permissions: read-all jobs: linters: runs-on: ubuntu-20.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml index 8f70389e645906..ef597e55858b0e 100644 --- a/.github/workflows/send_workflows_to_opentelemetry.yml +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -37,7 +37,7 @@ jobs: otel-export-trace: name: Export finished workflow metrics runs-on: aks-linux-2-cores-8gb - if: github.repository == 'openvinotoolkit/openvino' + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Checkout diff --git a/.github/workflows/stale_prs_and_issues.yml b/.github/workflows/stale_prs_and_issues.yml index 395fc6a350e2ba..d246a7e83f4f73 100644 --- a/.github/workflows/stale_prs_and_issues.yml +++ b/.github/workflows/stale_prs_and_issues.yml @@ -12,6 +12,7 @@ jobs: issues: write pull-requests: write runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 with: diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml index 5d74284b8c16fc..df1450a98e46a1 100644 --- a/.github/workflows/ubuntu_20.yml +++ b/.github/workflows/ubuntu_20.yml @@ -54,6 +54,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -77,7 +78,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index b2a2f78410e9f7..90618357b2c63f 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -56,6 +56,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -80,7 +81,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/ubuntu_22_dpcpp.yml b/.github/workflows/ubuntu_22_dpcpp.yml index 1a5c69f03fe690..1ee9df0095ff02 100644 --- a/.github/workflows/ubuntu_22_dpcpp.yml +++ b/.github/workflows/ubuntu_22_dpcpp.yml @@ -44,6 +44,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -66,7 +67,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index a0e1b314391b24..474e8a46ae57aa 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -51,6 +51,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -74,7 +75,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index 902fb0dfcb00f0..a4a1a17af9d5ce 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -48,6 +48,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -88,7 +89,6 @@ jobs: OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: webassembly_Release - if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 81af7aede704ea..1c193f35dfa17e 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -21,7 +21,8 @@ permissions: read-all jobs: rerun: name: Rerun Workflow - if: ${{ github.event.workflow_run.conclusion == 'failure' }} # Run only for the failed workflows + # Run only for the failed workflows in openvinotoolkit org + if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }} runs-on: aks-linux-2-cores-8gb permissions: actions: write diff --git a/src/inference/src/dev/threading/thread_affinity.cpp b/src/inference/src/dev/threading/thread_affinity.cpp index f53941f270af99..791e5a7fc70f07 100644 --- a/src/inference/src/dev/threading/thread_affinity.cpp +++ b/src/inference/src/dev/threading/thread_affinity.cpp @@ -87,8 +87,9 @@ bool pin_thread_to_vacant_core(int thrIdx, } bool pin_current_thread_to_socket(int socket) { - const int sockets = ov::get_available_numa_nodes().size(); - const int cores = ov::get_number_of_cpu_cores(); + auto proc_type_table = get_org_proc_type_table(); + const int sockets = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : 1; + const int cores = proc_type_table[0][MAIN_CORE_PROC]; const int cores_per_socket = cores / sockets; int ncpus = 0; diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index 2dcf9eaa4d6ad4..e30bcbbe8bc55e 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -12,6 +12,7 @@ #include #include "dev/threading/parallel_custom_arena.hpp" +#include "dev/threading/thread_affinity.hpp" #include "openvino/core/except.hpp" #include "openvino/runtime/system_conf.hpp" #include "os/cpu_map_info.hpp" @@ -114,10 +115,11 @@ CPU::CPU() { }; auto check_valid_cpu = [&]() { - cpu_set_t mask; - CPU_ZERO(&mask); + ov::threading::CpuSet mask; + int ncpus = 0; + std::tie(mask, ncpus) = ov::threading::get_process_mask(); - if ((_processors == 0) || (sched_getaffinity(0, sizeof(cpu_set_t), &mask) == -1)) { + if ((_processors == 0) || mask == nullptr) { return -1; } @@ -128,7 +130,7 @@ CPU::CPU() { numa_node_list.assign(_sockets, std::vector()); for (int i = 0; i < _processors; i++) { - if (CPU_ISSET(i, &mask)) { + if (CPU_ISSET(i, mask)) { valid_cpu_mapping_table.emplace_back(_cpu_mapping_table[i]); if (_cpu_mapping_table[i][CPU_MAP_CORE_TYPE] == MAIN_CORE_PROC) { phy_core_list.emplace_back(_cpu_mapping_table[i][CPU_MAP_CORE_ID]); diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index 6ebec87feccba1..9de1eeb78e1547 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -327,26 +327,10 @@ int get_org_numa_id(int numa_node_id) { # ifndef _WIN32 int get_number_of_cpu_cores(bool bigCoresOnly) { CPU& cpu = cpu_info(); - unsigned numberOfProcessors = cpu._processors; unsigned totalNumberOfCpuCores = cpu._cores; OPENVINO_ASSERT(totalNumberOfCpuCores != 0, "Total number of cpu cores can not be 0."); - cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet; - CPU_ZERO(¤tCpuSet); - CPU_ZERO(&usedCoreSet); - CPU_ZERO(¤tCoreSet); - - sched_getaffinity(0, sizeof(currentCpuSet), ¤tCpuSet); - - for (unsigned processorId = 0u; processorId < numberOfProcessors; processorId++) { - if (CPU_ISSET(processorId, ¤tCpuSet)) { - unsigned coreId = processorId % totalNumberOfCpuCores; - if (!CPU_ISSET(coreId, &usedCoreSet)) { - CPU_SET(coreId, &usedCoreSet); - CPU_SET(processorId, ¤tCoreSet); - } - } - } - int phys_cores = CPU_COUNT(¤tCoreSet); + + int phys_cores = totalNumberOfCpuCores; # if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) auto core_types = custom::info::core_types(); if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ { diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp index f3e96e8650b672..570e2057d9b5e5 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp @@ -151,6 +151,32 @@ struct NetworkDescription final { NetworkMetadata metadata; }; +/** + * @struct CompiledNetwork + * @brief Custom container for compiled network, used for export + * @var CompiledNetwork::data + * Pointer to the address of compiled network + * @var CompiledNetwork:size + * Size of the compiled network + * @var CompiledNetwork::ownedStorage + * Plugin owned compiled network storage that is required in case of a driver that + * doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network. + * @note It's unsafe to store either data or size outside of the compiled network object as its destructor + * would release the owning container + */ + +struct CompiledNetwork { + const uint8_t* data; + size_t size; + CompiledNetwork(const uint8_t* data, size_t size, std::vector storage) + : data(data), + size(size), + ownedStorage(std::move(storage)) {} + +private: + std::vector ownedStorage; +}; + /** * @interface ICompiler * @brief An interface to be implemented by a concrete compiler to provide @@ -203,8 +229,10 @@ class ICompiler : public std::enable_shared_from_this { // Driver compiler can use this to release graphHandle, if we do not have executor virtual void release([[maybe_unused]] std::shared_ptr networkDescription){}; - virtual std::vector getCompiledNetwork(std::shared_ptr networkDescription) { - return networkDescription->compiledNetwork; + virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) { + return CompiledNetwork(networkDescription.compiledNetwork.data(), + networkDescription.compiledNetwork.size(), + networkDescription.compiledNetwork); } protected: diff --git a/src/plugins/intel_npu/src/backend/include/zero_device.hpp b/src/plugins/intel_npu/src/backend/include/zero_device.hpp index 7453cfc300815e..9d034b1bb4038b 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_device.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_device.hpp @@ -69,6 +69,7 @@ class ZeroDevice : public IDevice { std::map device_gops = {{ov::element::f32, 0.f}, {ov::element::f16, 0.f}, + {ov::element::bf16, 0.f}, {ov::element::u8, 0.f}, {ov::element::i8, 0.f}}; diff --git a/src/plugins/intel_npu/src/backend/include/zero_types.hpp b/src/plugins/intel_npu/src/backend/include/zero_types.hpp index b0dbef843868fe..834d66a45a80d9 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_types.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_types.hpp @@ -16,7 +16,7 @@ /** * @brief Last version of Table of Graph Extension functions used within plugin */ -using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_6_t; +using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t; /** * @brief Last version of the Command Queue functions used within plugin */ @@ -155,6 +155,12 @@ struct ze_graph_dditable_ext_decorator final { throwWhenUnsupported("pfnDeviceGetGraphProperties2", ZE_GRAPH_EXT_VERSION_1_6); return _impl->pfnDeviceGetGraphProperties2(hDevice, pDeviceGraphProperties); } + + // version 1.7 + ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) { + throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7); + return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary); + } }; /** diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index ec17b0e137cf25..0a8d8dded5e97d 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -551,6 +551,8 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi break; case ov::element::Type_t::f16: break; + case ov::element::Type_t::bf16: + break; case ov::element::Type_t::u4: break; case ov::element::Type_t::i4: @@ -575,7 +577,7 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi break; default: OPENVINO_THROW("Unsupported tensor precision: " + ov::element::Type(precision).get_type_name() + - "! Supported precisions: FP32, FP16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64"); + "! Supported precisions: FP32, FP16, BF16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64"); } } diff --git a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp index 3f02cecd2b0f19..99de755e1c49aa 100644 --- a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp @@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler { void release(std::shared_ptr networkDescription) override; - std::vector getCompiledNetwork(std::shared_ptr networkDescription) override; + CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; private: /** diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index 108b48cf0c6f73..523fc87a7f9dd3 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -43,6 +43,11 @@ using SerializedIR = std::pair>; (std::is_same::value || std::is_same::value || \ std::is_same::value || std::is_same::value) +#define UseCopyForNativeBinary(T) \ + (std::is_same::value || std::is_same::value || \ + std::is_same::value || std::is_same::value || \ + std::is_same::value) + /** * Adapter to use CiD through ZeroAPI */ @@ -100,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler { void release(std::shared_ptr networkDescription) override; - std::vector getCompiledNetwork(std::shared_ptr networkDescription) override; + CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; private: NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; @@ -123,6 +128,20 @@ class LevelZeroCompilerInDriver final : public ICompiler { std::vector& inputs, std::vector& outputs) const; + template = true> + void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& blob, + uint8_t*& blobPtr, + size_t& blobSize) const; + + template = true> + void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& /* unusedBlob */, + uint8_t*& blobPtr, + size_t& blobSize) const; + template = true> ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr& model, const Config& config, diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index ceacd9cda037a5..1f2a23539a99f5 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -64,6 +64,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr>(driverHandle, + deviceHandle, + zeContext, + graph_ddi_table_ext); + break; default: apiAdapter = std::make_shared>(driverHandle, deviceHandle, @@ -109,10 +115,9 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr apiAdapter->release(std::move(networkDescription)); } -std::vector LevelZeroCompilerAdapter::getCompiledNetwork( - std::shared_ptr networkDescription) { +CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) { _logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)"); - return apiAdapter->getCompiledNetwork(std::move(networkDescription)); + return apiAdapter->getCompiledNetwork(networkDescription); } } // namespace driverCompilerAdapter diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index c1398d227820da..0e02bb48f3a4b7 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -363,46 +363,83 @@ void LevelZeroCompilerInDriver::release(std::shared_ptr -std::vector LevelZeroCompilerInDriver::getCompiledNetwork( - std::shared_ptr networkDescription) { - if (networkDescription->metadata.graphHandle != nullptr && networkDescription->compiledNetwork.size() == 0) { +template > +void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& blob, + uint8_t*& blobPtr, + size_t& blobSize) const { + // Get blob size first + auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr); + blob.resize(blobSize); + + OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, + "Failed to compile network. L0 pfnGetNativeBinary get blob size", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result), + ". ", + getLatestBuildError()); + + // Get blob data + result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data()); + + OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, + "Failed to compile network. L0 pfnGetNativeBinary get blob data", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result), + ". ", + getLatestBuildError()); + + blobPtr = blob.data(); +} + +template +template > +void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& /* unusedBlob */, + uint8_t*& blobPtr, + size_t& blobSize) const { + // Get blob ptr and size + auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr); + + OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, + "Failed to compile network. L0 pfnGetNativeBinary get blob size", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result), + ". ", + getLatestBuildError()); +} + +template +CompiledNetwork LevelZeroCompilerInDriver::getCompiledNetwork( + const NetworkDescription& networkDescription) { + if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) { _logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle"); - ze_graph_handle_t graphHandle = static_cast(networkDescription->metadata.graphHandle); + ze_graph_handle_t graphHandle = static_cast(networkDescription.metadata.graphHandle); - // Get blob size first + uint8_t* blobPtr = nullptr; size_t blobSize = -1; + std::vector blob; + + getNativeBinary(_graphDdiTableExt, graphHandle, blob, blobPtr, blobSize); - auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr); - - OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, - "Failed to compile network. L0 pfnGetNativeBinary get blob size", - " result: ", - ze_result_to_string(result), - ", code 0x", - std::hex, - uint64_t(result), - ". ", - getLatestBuildError()); - - std::vector blob(blobSize); - // Get blob data - result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data()); - - OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, - "Failed to compile network. L0 pfnGetNativeBinary get blob data", - " result: ", - ze_result_to_string(result), - ", code 0x", - std::hex, - uint64_t(result), - ". ", - getLatestBuildError()); _logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob"); - return blob; - } else { - _logger.info("return the blob from network description"); - return networkDescription->compiledNetwork; + return CompiledNetwork(blobPtr, blobSize, std::move(blob)); } + _logger.info("return the blob from network description"); + return CompiledNetwork(networkDescription.compiledNetwork.data(), + networkDescription.compiledNetwork.size(), + networkDescription.compiledNetwork); } template @@ -1201,6 +1238,7 @@ template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; +template class LevelZeroCompilerInDriver; } // namespace driverCompilerAdapter } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 9acdb396293f3c..6a9cf017fded81 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -140,10 +140,8 @@ std::vector getIsolates(const std::string isolates_unparsed) { if (!isolates.empty()) { LOG_INFO("Online partitioning will isolate subgraphs containing specified patterns."); } else { - LOG_WARN("Incorect pattern in NPUW_ONLINE_ISOLATE!" - << " Please, follow the example: " - << "Op:Select/NPU,P:DQMatMulGQ/compute,P:DQMatMulCW/compute,P:RMSNorm/compute. " - << "No isolate rules will be taken into account during partitioning!"); + LOG_WARN("Incorect pattern in NPUW_ONLINE_ISOLATE! No isolate rules will be taken into account during " + "partitioning!"); } return isolates; @@ -193,7 +191,8 @@ std::vector getNoFolds(const std::string& nofolds_unparsed) { void setComputeConfig(PassContext& ctx) { // FIXME: initialize via a dedicated function instead of parsing - ctx.isolates = detail::getIsolates("P:DQMatMulGQ/compute,P:DQMatMulCW/compute,P:RMSNorm/compute"); + ctx.isolates = detail::getIsolates("P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,P:DQMatMulGQi4/" + "compute,P:DQMatMulCWi4/compute,P:RMSNorm/compute"); ctx.nofolds = detail::getNoFolds("compute"); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index 2ee36fcb09361a..a35f33eab49178 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -404,14 +404,21 @@ void Snapshot::earlyRegroup() { if (isolate.pattern == "RMSNorm") { rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; - } else if (isolate.pattern == "DQMatMulCW") { - rewr.add_matcher(shared_from_this(), isolate.tag); + } else if (isolate.pattern == "DQMatMulCWu4") { + rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; - } else if (isolate.pattern == "DQMatMulGQ") { - rewr.add_matcher(shared_from_this(), isolate.tag); + } else if (isolate.pattern == "DQMatMulGQu4") { + rewr.add_matcher(shared_from_this(), isolate.tag); + handle_patterns = true; + } else if (isolate.pattern == "DQMatMulCWi4") { + rewr.add_matcher(shared_from_this(), isolate.tag); + handle_patterns = true; + } else if (isolate.pattern == "DQMatMulGQi4") { + rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; } else { - LOG_WARN("OPENVINO_NPUW_ISOLATE only supports RMSNorm, DQMatMulCW, DQMatMulGQ " + LOG_WARN("OPENVINO_NPUW_ISOLATE only supports RMSNorm, DQMatMulCWu4, DQMatMulGQu4, DQMatMulCWi4, " + "DQMatMulGQi4 " << "as patterns. Isolate pattern " << isolate.pattern << " is skipped!"); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp index d43fc8d95c3ae8..e7f09b00cde2a2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp @@ -7,24 +7,7 @@ #include "../../logging.hpp" #include "../online/group.hpp" // online::Group #include "../online/snapshot.hpp" // online::Snapshot -#include "openvino/op/add.hpp" -#include "openvino/op/broadcast.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/op/convert.hpp" -#include "openvino/op/divide.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/greater.hpp" -#include "openvino/op/matmul.hpp" -#include "openvino/op/mod.hpp" -#include "openvino/op/multiply.hpp" -#include "openvino/op/power.hpp" -#include "openvino/op/reduce_mean.hpp" -#include "openvino/op/reshape.hpp" -#include "openvino/op/shape_of.hpp" -#include "openvino/op/sqrt.hpp" -#include "openvino/op/subtract.hpp" -#include "openvino/op/util/op_types.hpp" -#include "openvino/op/variadic_split.hpp" +#include "openvino/op/ops.hpp" #include "openvino/pass/pattern/op/label.hpp" // any_input #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/util/common_util.hpp" @@ -37,7 +20,7 @@ namespace compute { namespace opp = ov::pass::pattern; // TODO: visualize -DQMatMulGQ::DQMatMulGQ(const std::shared_ptr& snapshot, const std::string& isol_tag) { +DQMatMulGQu4::DQMatMulGQu4(const std::shared_ptr& snapshot, const std::string& isol_tag) { auto qweight = opp::wrap_type(); auto qzerop = opp::wrap_type(); auto qcoeff = opp::wrap_type(); @@ -87,11 +70,11 @@ DQMatMulGQ::DQMatMulGQ(const std::shared_ptr& snapsh return false; // root hasn't changed }; - register_matcher(std::make_shared(qmm, "TagDQMatMulGQ"), std::move(callback)); + register_matcher(std::make_shared(qmm, "TagDQMatMulGQu4"), std::move(callback)); } // TODO: visualize -DQMatMulCW::DQMatMulCW(const std::shared_ptr& snapshot, const std::string& isol_tag) { +DQMatMulCWu4::DQMatMulCWu4(const std::shared_ptr& snapshot, const std::string& isol_tag) { auto qweight = opp::wrap_type(); auto qzerop = opp::wrap_type(); auto qcoeff = opp::wrap_type(); @@ -140,7 +123,99 @@ DQMatMulCW::DQMatMulCW(const std::shared_ptr& snapsh return false; // root hasn't changed }; - register_matcher(std::make_shared(qmm, "TagDQMatMulCW"), std::move(callback)); + register_matcher(std::make_shared(qmm, "TagDQMatMulCWu4"), std::move(callback)); +} + +// TODO: visualize +DQMatMulGQi4::DQMatMulGQi4(const std::shared_ptr& snapshot, const std::string& isol_tag) { + auto qweight = opp::wrap_type(); + auto qcoeff = opp::wrap_type(); + + auto qcvtw = opp::wrap_type({qweight}); + + auto qmuls = opp::wrap_type({qcvtw, qcoeff}); + auto qreshp = opp::wrap_type({qmuls, opp::any_input()}); + auto qcvtr = opp::wrap_type({qreshp}); + auto qmm = opp::wrap_type({opp::any_input(), qcvtr}); + + auto node_to_gptr = snapshot->getNodeToGroupMap(); + + // Note: Use [=] to make sure the above objects stay alive in the callback + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr(); + auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr(); + + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qweight)); + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qcoeff)); + + auto matched_qweight = std::static_pointer_cast(matched_node_qweight); + auto matched_qcoeff = std::static_pointer_cast(matched_node_qcoeff); + + if ((ov::element::i4 == matched_qweight->get_element_type() || + ov::element::i8 == matched_qweight->get_element_type()) && + ov::element::f16 == matched_qcoeff->get_element_type()) { + // Partitioning ignores Const->Convert nodes, so qcvtw is not used + auto matched_qmuls = node_to_output.at(qmuls).get_node_shared_ptr(); + auto matched_qreshp = node_to_output.at(qreshp).get_node_shared_ptr(); + auto matched_qcvtr = node_to_output.at(qcvtr).get_node_shared_ptr(); + auto matched_qmm = node_to_output.at(qmm).get_node_shared_ptr(); + + node_to_gptr->at(matched_qmuls)->isolate(isol_tag); + node_to_gptr->at(matched_qreshp)->isolate(isol_tag); + node_to_gptr->at(matched_qcvtr)->isolate(isol_tag); + node_to_gptr->at(matched_qmm)->isolate(isol_tag); + } + + return false; // root hasn't changed + }; + register_matcher(std::make_shared(qmm, "TagDQMatMulGQi4"), std::move(callback)); +} + +// TODO: visualize +DQMatMulCWi4::DQMatMulCWi4(const std::shared_ptr& snapshot, const std::string& isol_tag) { + auto qweight = opp::wrap_type(); + auto qcoeff = opp::wrap_type(); + + auto qcvtw = opp::wrap_type({qweight}); + + auto qmuls = opp::wrap_type({qcvtw, qcoeff}); + + auto qcvtm = opp::wrap_type({qmuls}); + auto qmm = opp::wrap_type({opp::any_input(), qcvtm}); + + auto node_to_gptr = snapshot->getNodeToGroupMap(); + + // Note: Use [=] to make sure the above objects stay alive in the callback + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr(); + auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr(); + + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qweight)); + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qcoeff)); + + auto matched_qweight = std::static_pointer_cast(matched_node_qweight); + auto matched_qcoeff = std::static_pointer_cast(matched_node_qcoeff); + + if ((ov::element::i4 == matched_qweight->get_element_type() || + ov::element::i8 == matched_qweight->get_element_type()) && + ov::element::f16 == matched_qcoeff->get_element_type()) { + // Partitioning ignores Const->Convert nodes, so qcvtw is not used + auto matched_qmuls = node_to_output.at(qmuls).get_node_shared_ptr(); + auto matched_qcvtm = node_to_output.at(qcvtm).get_node_shared_ptr(); + auto matched_qmm = node_to_output.at(qmm).get_node_shared_ptr(); + + node_to_gptr->at(matched_qmuls)->isolate(isol_tag); + node_to_gptr->at(matched_qcvtm)->isolate(isol_tag); + node_to_gptr->at(matched_qmm)->isolate(isol_tag); + } + + return false; // root hasn't changed + }; + register_matcher(std::make_shared(qmm, "TagDQMatMulCWi4"), std::move(callback)); } // TODO: visualize diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp index 80aa4d095d3c9f..92e60cb95fbdbe 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp @@ -21,14 +21,24 @@ class Snapshot; // Forward declaration namespace patterns { namespace compute { -class DQMatMulGQ : public ov::pass::MatcherPass { +class DQMatMulGQu4 : public ov::pass::MatcherPass { public: - DQMatMulGQ(const std::shared_ptr& snapshot, const std::string& isol_tag); + DQMatMulGQu4(const std::shared_ptr& snapshot, const std::string& isol_tag); }; -class DQMatMulCW : public ov::pass::MatcherPass { +class DQMatMulCWu4 : public ov::pass::MatcherPass { public: - DQMatMulCW(const std::shared_ptr& snapshot, const std::string& isol_tag); + DQMatMulCWu4(const std::shared_ptr& snapshot, const std::string& isol_tag); +}; + +class DQMatMulGQi4 : public ov::pass::MatcherPass { +public: + DQMatMulGQi4(const std::shared_ptr& snapshot, const std::string& isol_tag); +}; + +class DQMatMulCWi4 : public ov::pass::MatcherPass { +public: + DQMatMulCWi4(const std::shared_ptr& snapshot, const std::string& isol_tag); }; class RMSNorm : public ov::pass::MatcherPass { diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 914879feee359f..51ed0e2c5c4858 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -27,10 +27,11 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE = "Can't create infer request!\n" "Please make sure that the device is available. Only exports can be made."; -std::uint32_t hash(const std::vector& data) { +std::uint32_t hash(const intel_npu::CompiledNetwork& blob) { std::uint32_t result = 1171117u; - for (const auto& c : data) - result = ((result << 7) + result) + static_cast(c); + for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) { + result = ((result << 7) + result) + static_cast(*it); + } return result; } @@ -139,15 +140,17 @@ std::shared_ptr CompiledModel::create_sync_infer_request( void CompiledModel::export_model(std::ostream& stream) const { _logger.debug("CompiledModel::export_model"); - const auto&& blob = _compiler->getCompiledNetwork(_networkPtr); - stream.write(reinterpret_cast(blob.data()), blob.size()); - std::stringstream str; - str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob); - _logger.info(str.str().c_str()); + const auto blob = _compiler->getCompiledNetwork(*_networkPtr); + stream.write(reinterpret_cast(blob.data), blob.size); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); } else { + if (_logger.level() >= ov::log::Level::INFO) { + std::stringstream str; + str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob); + _logger.info(str.str().c_str()); + } _logger.info("Write blob to stream successfully."); } } diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp index c38125a6458e7d..4eb829045c964a 100644 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -266,7 +266,6 @@ std::vector disabledTestPatterns() { _skipRegistry.addPatterns( "Tests with unsupported precision", { ".*InferRequestCheckTensorPrecision.*type=boolean.*", - ".*InferRequestCheckTensorPrecision.*type=bf16.*", ".*InferRequestCheckTensorPrecision.*type=f64.*", ".*InferRequestCheckTensorPrecision.*type=u1\\D.*", // [Track number: E#97469] diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index 16c85231a82ee1..816b5ce120096c 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit 16c85231a82ee1a0b06ed7ab7da3f411a0878ed7 +Subproject commit 816b5ce120096cbc115b56ed43f8a030eb420b19 diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp index 14fce26bdd7458..3b3009bb5f459c 100644 --- a/src/plugins/intel_npu/tools/single-image-test/main.cpp +++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp @@ -287,7 +287,8 @@ std::vector ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co "Unsupported layout: ", layout.to_string()); OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 || - precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32, + precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 || + precision == ov::element::Type_t::i32, "Unsupported precision: ", precision.get_type_name()); int cvType = 0; @@ -302,6 +303,9 @@ std::vector ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co } else if (precision == ov::element::Type_t::f16) { cvType = CV_16SC1; elemSize = sizeof(ov::float16); + } else if (precision == ov::element::Type_t::bf16) { + cvType = CV_16SC1; + elemSize = sizeof(ov::bfloat16); } else if (precision == ov::element::Type_t::i32) { cvType = CV_32SC1; elemSize = sizeof(int32_t); @@ -392,11 +396,14 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha cvType = static_cast(CV_32FC(C)); } else if (precision == ov::element::Type_t::f16) { cvType = static_cast(CV_16SC(C)); + } else if (precision == ov::element::Type_t::bf16) { + cvType = static_cast(CV_16SC(C)); } else if (precision == ov::element::Type_t::i32) { cvType = static_cast(CV_32SC(C)); } else { OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 || - precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32, + precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 || + precision == ov::element::Type_t::i32, "Unsupported precision ", precision.get_type_name()); } @@ -437,6 +444,10 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha const auto inPtr = in.ptr(); const auto outPtr = out.ptr(); convertBufferType(outPtr, inPtr, out.size().area() * C); + } else if (precision == ov::element::Type_t::bf16) { + const auto inPtr = in.ptr(); + const auto outPtr = out.ptr(); + convertBufferType(outPtr, inPtr, out.size().area() * C); } else if (precision == ov::element::Type_t::i32) { in.convertTo(out, CV_32S); } else { @@ -451,7 +462,8 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha } else if (layout == ov::Layout("NCHW")) { auto tensorPlanes = ovToCV(tensor, shape, layout, 0); - if (precision != ov::element::Type_t::f16) { + if (!(precision == ov::element::Type_t::f16 || + precision == ov::element::Type_t::bf16)) { cv::split(in, tensorPlanes); } else { std::vector inPlanes; @@ -461,8 +473,13 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha for (size_t i = 0; i < tensorPlanes.size(); ++i) { const auto inPtr = inPlanes[i].ptr(); - const auto outPtr = tensorPlanes[i].ptr(); - convertBufferType(outPtr, inPtr, inPlanes[i].size().area()); + if (precision == ov::element::Type_t::f16) { + const auto outPtr = tensorPlanes[i].ptr(); + convertBufferType(outPtr, inPtr, inPlanes[i].size().area()); + } else if (precision == ov::element::Type_t::bf16) { + const auto outPtr = tensorPlanes[i].ptr(); + convertBufferType(outPtr, inPtr, inPlanes[i].size().area()); + } } } @@ -1761,6 +1778,8 @@ static int runSingleImageTest() { inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f32; } else if (strEq(precision, "FP16")) { inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f16; + } else if (strEq(precision, "BF16")) { + inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::bf16; } else if (strEq(precision, "I32")) { inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::i32; } else if (strEq(precision, "I64")) { @@ -1808,6 +1827,8 @@ static int runSingleImageTest() { ov::element::Type prc_in = ov::element::u8; if (FLAGS_ip == "FP16") prc_in = ov::element::f16; + else if (FLAGS_ip == "BF16") + prc_in = ov::element::bf16; else if (FLAGS_ip == "FP32") prc_in = ov::element::f32; else if (FLAGS_ip == "I32") diff --git a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py index 78bd4d526dddb3..ea731d4a7aefb9 100644 --- a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py +++ b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py @@ -36,7 +36,7 @@ def _infer_pipelines(test_input, preprocess_pipeline, input_channels=3): try: return _infer_pipelines_impl(test_input, preprocess_pipeline, input_channels) except RuntimeError as e: - if "builtin cannot be used as a value" in e: + if "builtin cannot be used as a value" in str(e): # This is a potentially sporadic issue print(f"An error occurred: {e}. Retrying...") retries += 1 diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 2481f1d65ef8fb..a2f54076de9d7f 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -76,7 +76,7 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti try: return self._test_impl(model, ref_net, kind, ie_device, precision, ir_version, infer_timeout, dynamic_shapes, **kwargs) except RuntimeError as e: - if "builtin cannot be used as a value" in e: + if "builtin cannot be used as a value" in str(e): # This is a potentially sporadic issue print(f"An error occurred: {e}. Retrying...") retries += 1