enable numa_node_id and socket_id in streams_info_table (openvinotool…

…kit#18164) * enable numa_node_id and socket_id in streams_info_table * fix code style issue * fix document issue * update for comments * update for comments * update for comments
sunxiaoxia2022 · Jul 7, 2023 · d571ab3 · d571ab3
1 parent e81f85c
commit d571ab3
Show file tree

Hide file tree

Showing 8 changed files with 1,350 additions and 1,104 deletions.
diff --git a/src/inference/dev_api/threading/ie_cpu_streams_info.hpp b/src/inference/dev_api/threading/ie_cpu_streams_info.hpp
@@ -12,29 +12,37 @@
 namespace InferenceEngine {
 
 /**
- * @enum       column_of_cpu_streams_info_table
+ * @enum       ColumnOfCpuStreamsInfoTable
  * @brief      This enum contains definition of each columns in cpu streams information table.
  *
  * The following are two example of processor type table.
  *  1. 8 streams on hybrid platform which has 4 threads per stream (TPS).
+ *     1.1 2 streams (4 TPS) on physical core of Intel Performance-cores
+ *     1.2 4 streams (4 TPS) on Intel Efficient-cores
+ *     1.3 2 streams (4 TPS) on logic core of Intel Performance-cores
  *
- *  NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
- *          2               1                4          // 2 streams (4 TPS) on physical core of Intel Performance-cores
- *          4               2                4          // 4 streams (4 TPS) on Intel Efficient-cores
- *          2               3                4          // 2 streams (4 TPS) on logic core of Intel Performance-cores
+ *  NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM | STREAM_NUMA_NODE_ID | STREAM_SOCKET_ID
+ *          2               1                4                    0                    0
+ *          4               2                4                    0                    0
+ *          2               3                4                    0                    0
  *
  * 2. 1 stream (10 TPS) on hybrid platform which has 2 threads on physical core and 8 threads on Ecore.
+ *    2.1 1 streams (10 TPS) on multiple types of processors
+ *    2.2 2 threads on physical core of Intel Performance-cores
+ *    2.3 8 threads on Intel Efficient-cores
  *
- *  NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
- *          1               0               10          // 1 streams (10 TPS) on multiple types of processors
- *          0               1                2          // 2 threads on physical core of Intel Performance-cores
- *          0               2                8          // 8 threads on Intel Efficient-cores
+ *  NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM | STREAM_NUMA_NODE_ID | STREAM_SOCKET_ID
+ *          1               0               10                    0                    0
+ *          0               1                2                    0                    0
+ *          0               2                8                    0                    0
  */
-typedef enum {
+enum ColumnOfCpuStreamsInfoTable {
     NUMBER_OF_STREAMS = 0,      //!< Number of streams on specific CPU core tpye
     PROC_TYPE = 1,              //!< Core type of current streams
     THREADS_PER_STREAM = 2,     //!< Number of threads per stream of current streams
-    CPU_STREAMS_TABLE_SIZE = 3  //!< Size of streams info table
-} column_of_cpu_streams_info_table;
+    STREAM_NUMA_NODE_ID = 3,    //!< Numa node id of processors in this row
+    STREAM_SOCKET_ID = 4,       //!< Socket id of processors in this row
+    CPU_STREAMS_TABLE_SIZE = 5  //!< Size of streams info table
+};
 
 }  // namespace InferenceEngine
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
@@ -64,7 +64,7 @@ struct Config {
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
     bool enableHyperThreading = true;
     bool changedHyperThreading = false;
-    Config::LatencyThreadingMode scopeOflatencyCandidate = Config::LatencyThreadingMode::PER_SOCKET;
+    Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
     LPTransformsMode lpTransformsMode = LPTransformsMode::On;
 #else

diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -36,7 +36,7 @@ namespace intel_cpu {
  *               - input "0" indicates that the function generates the optimal number of threads per stream based on
  * processors type information.
  * @param[in]  input_perf_hint is performance hint set by user via ov::hint::performance_mode or the default value.
- * @param[in]  scopeOflatencyCandidate is the scope of candidate processors per stream for latency hint
+ * @param[in]  latencyThreadingMode is the scope of candidate processors per stream for latency hint
  *               - user can select all processors per numa node, per socket, or per platform.
  * @param[in]  proc_type_table is currently available candidate processors.
  *               - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type
@@ -49,7 +49,7 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int input_infer_requests,
                                                      const int model_prefer_threads,
                                                      const std::string input_perf_hint,
-                                                     const Config::LatencyThreadingMode scopeOflatencyCandidate,
+                                                     const Config::LatencyThreadingMode latencyThreadingMode,
                                                      const std::vector<std::vector<int>> proc_type_table);
 /**
  * @brief      Get model_prefer_threads

diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/enable_ht_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/enable_ht_test.cpp
@@ -0,0 +1,289 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <ie_system_conf.h>
+
+#include <common_test_utils/test_common.hpp>
+
+#include "cpu_map_scheduling.hpp"
+#include "cpu_streams_calculation.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+using namespace ov;
+
+namespace {
+
+struct UseHTTestCase {
+    bool input_ht_value;
+    bool input_ht_changed;
+    std::string input_pm_hint;
+    std::vector<std::vector<int>> proc_type_table;
+    std::vector<std::vector<int>> result_table;
+    bool output_ht_value;
+};
+
+class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<UseHTTestCase>> {
+public:
+    void SetUp() override {
+        auto test_data = std::get<0>(GetParam());
+
+        std::vector<std::vector<int>> test_result_table =
+            ov::intel_cpu::apply_hyper_threading(test_data.input_ht_value,
+                                                 test_data.input_ht_changed,
+                                                 test_data.input_pm_hint,
+                                                 test_data.proc_type_table);
+
+        ASSERT_EQ(test_data.result_table, test_result_table);
+        ASSERT_EQ(test_data.input_ht_value, test_data.output_ht_value);
+    }
+};
+
+UseHTTestCase _2sockets_false_latency = {
+    false,
+    true,
+    "LATENCY",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+    false,
+};
+
+UseHTTestCase _2sockets_false_throughput = {
+    false,
+    true,
+    "THROUGHPUT",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+    false,
+};
+
+UseHTTestCase _2sockets_true_latency = {
+    true,
+    true,
+    "LATENCY",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    true,
+};
+
+UseHTTestCase _2sockets_true_throughput = {
+    true,
+    true,
+    "THROUGHPUT",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    true,
+};
+
+UseHTTestCase _2sockets_default_1_latency = {
+    false,
+    false,
+    "LATENCY",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+    false,
+};
+
+UseHTTestCase _2sockets_default_1_throughput = {
+    false,
+    false,
+    "THROUGHPUT",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+    false,
+};
+
+UseHTTestCase _2sockets_default_2_latency = {
+    true,
+    false,
+    "LATENCY",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+    false,
+};
+
+UseHTTestCase _2sockets_default_2_throughput = {
+    true,
+    false,
+    "THROUGHPUT",
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_1_false_latency = {
+    false,
+    true,
+    "LATENCY",
+    {{20, 6, 8, 6}},
+    {{14, 6, 8, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_1_false_throughput = {
+    false,
+    true,
+    "THROUGHPUT",
+    {{20, 6, 8, 6}},
+    {{14, 6, 8, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_1_true_latency = {
+    true,
+    true,
+    "LATENCY",
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_1_true_throughput = {
+    true,
+    true,
+    "THROUGHPUT",
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_1_default_1_latency = {
+    false,
+    false,
+    "LATENCY",
+    {{20, 6, 8, 6}},
+    {{14, 6, 8, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_1_default_1_throughput = {
+    false,
+    false,
+    "THROUGHPUT",
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_1_default_2_latency = {
+    true,
+    false,
+    "LATENCY",
+    {{20, 6, 8, 6}},
+    {{14, 6, 8, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_1_default_2_throughput = {
+    true,
+    false,
+    "THROUGHPUT",
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_2_false_latency = {
+    false,
+    true,
+    "LATENCY",
+    {{12, 6, 0, 6}},
+    {{6, 6, 0, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_2_false_throughput = {
+    false,
+    true,
+    "THROUGHPUT",
+    {{12, 6, 0, 6}},
+    {{6, 6, 0, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_2_true_latency = {
+    true,
+    true,
+    "LATENCY",
+    {{12, 6, 0, 6}},
+    {{12, 6, 0, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_2_true_throughput = {
+    true,
+    true,
+    "THROUGHPUT",
+    {{12, 6, 0, 6}},
+    {{12, 6, 0, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_2_default_1_latency = {
+    false,
+    false,
+    "LATENCY",
+    {{12, 6, 0, 6}},
+    {{6, 6, 0, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_2_default_1_throughput = {
+    false,
+    false,
+    "THROUGHPUT",
+    {{12, 6, 0, 6}},
+    {{12, 6, 0, 6}},
+    true,
+};
+
+UseHTTestCase _1sockets_2_default_2_latency = {
+    true,
+    false,
+    "LATENCY",
+    {{12, 6, 0, 6}},
+    {{6, 6, 0, 0}},
+    false,
+};
+
+UseHTTestCase _1sockets_2_default_2_throughput = {
+    true,
+    false,
+    "THROUGHPUT",
+    {{12, 6, 0, 6}},
+    {{12, 6, 0, 6}},
+    true,
+};
+
+TEST_P(UseHTTests, UseHT) {}
+
+INSTANTIATE_TEST_SUITE_P(UseHTTable,
+                         UseHTTests,
+                         testing::Values(_2sockets_false_latency,
+                                         _2sockets_true_latency,
+                                         _2sockets_default_1_latency,
+                                         _2sockets_default_2_latency,
+                                         _1sockets_1_false_latency,
+                                         _1sockets_1_true_latency,
+                                         _1sockets_1_default_1_latency,
+                                         _1sockets_1_default_2_latency,
+                                         _1sockets_2_false_latency,
+                                         _1sockets_2_true_latency,
+                                         _1sockets_2_default_1_latency,
+                                         _1sockets_2_default_2_latency,
+                                         _2sockets_false_throughput,
+                                         _2sockets_true_throughput,
+                                         _2sockets_default_1_throughput,
+                                         _2sockets_default_2_throughput,
+                                         _1sockets_1_false_throughput,
+                                         _1sockets_1_true_throughput,
+                                         _1sockets_1_default_1_throughput,
+                                         _1sockets_1_default_2_throughput,
+                                         _1sockets_2_false_throughput,
+                                         _1sockets_2_true_throughput,
+                                         _1sockets_2_default_1_throughput,
+                                         _1sockets_2_default_2_throughput));
+
+}  // namespace