Skip to content

Commit

Permalink
enable numa_node_id and socket_id in streams_info_table (openvinotool…
Browse files Browse the repository at this point in the history
…kit#18164)

* enable numa_node_id and socket_id in streams_info_table

* fix code style issue

* fix document issue

* update for comments

* update for comments

* update for comments
  • Loading branch information
wangleis authored Jul 7, 2023
1 parent e81f85c commit d571ab3
Show file tree
Hide file tree
Showing 8 changed files with 1,350 additions and 1,104 deletions.
32 changes: 20 additions & 12 deletions src/inference/dev_api/threading/ie_cpu_streams_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,37 @@
namespace InferenceEngine {

/**
* @enum column_of_cpu_streams_info_table
* @enum ColumnOfCpuStreamsInfoTable
* @brief This enum contains definition of each columns in cpu streams information table.
*
* The following are two example of processor type table.
* 1. 8 streams on hybrid platform which has 4 threads per stream (TPS).
* 1.1 2 streams (4 TPS) on physical core of Intel Performance-cores
* 1.2 4 streams (4 TPS) on Intel Efficient-cores
* 1.3 2 streams (4 TPS) on logic core of Intel Performance-cores
*
* NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
* 2 1 4 // 2 streams (4 TPS) on physical core of Intel Performance-cores
* 4 2 4 // 4 streams (4 TPS) on Intel Efficient-cores
* 2 3 4 // 2 streams (4 TPS) on logic core of Intel Performance-cores
* NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM | STREAM_NUMA_NODE_ID | STREAM_SOCKET_ID
* 2 1 4 0 0
* 4 2 4 0 0
* 2 3 4 0 0
*
* 2. 1 stream (10 TPS) on hybrid platform which has 2 threads on physical core and 8 threads on Ecore.
* 2.1 1 streams (10 TPS) on multiple types of processors
* 2.2 2 threads on physical core of Intel Performance-cores
* 2.3 8 threads on Intel Efficient-cores
*
* NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
* 1 0 10 // 1 streams (10 TPS) on multiple types of processors
* 0 1 2 // 2 threads on physical core of Intel Performance-cores
* 0 2 8 // 8 threads on Intel Efficient-cores
* NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM | STREAM_NUMA_NODE_ID | STREAM_SOCKET_ID
* 1 0 10 0 0
* 0 1 2 0 0
* 0 2 8 0 0
*/
typedef enum {
enum ColumnOfCpuStreamsInfoTable {
NUMBER_OF_STREAMS = 0, //!< Number of streams on specific CPU core tpye
PROC_TYPE = 1, //!< Core type of current streams
THREADS_PER_STREAM = 2, //!< Number of threads per stream of current streams
CPU_STREAMS_TABLE_SIZE = 3 //!< Size of streams info table
} column_of_cpu_streams_info_table;
STREAM_NUMA_NODE_ID = 3, //!< Numa node id of processors in this row
STREAM_SOCKET_ID = 4, //!< Socket id of processors in this row
CPU_STREAMS_TABLE_SIZE = 5 //!< Size of streams info table
};

} // namespace InferenceEngine
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ struct Config {
ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
bool enableHyperThreading = true;
bool changedHyperThreading = false;
Config::LatencyThreadingMode scopeOflatencyCandidate = Config::LatencyThreadingMode::PER_SOCKET;
Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
LPTransformsMode lpTransformsMode = LPTransformsMode::On;
#else
Expand Down
246 changes: 178 additions & 68 deletions src/plugins/intel_cpu/src/cpu_streams_calculation.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace intel_cpu {
* - input "0" indicates that the function generates the optimal number of threads per stream based on
* processors type information.
* @param[in] input_perf_hint is performance hint set by user via ov::hint::performance_mode or the default value.
* @param[in] scopeOflatencyCandidate is the scope of candidate processors per stream for latency hint
* @param[in] latencyThreadingMode is the scope of candidate processors per stream for latency hint
* - user can select all processors per numa node, per socket, or per platform.
* @param[in] proc_type_table is currently available candidate processors.
* - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type
Expand All @@ -49,7 +49,7 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
const int input_infer_requests,
const int model_prefer_threads,
const std::string input_perf_hint,
const Config::LatencyThreadingMode scopeOflatencyCandidate,
const Config::LatencyThreadingMode latencyThreadingMode,
const std::vector<std::vector<int>> proc_type_table);
/**
* @brief Get model_prefer_threads
Expand Down
289 changes: 289 additions & 0 deletions src/plugins/intel_cpu/tests/unit/streams_info/enable_ht_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>
#include <ie_system_conf.h>

#include <common_test_utils/test_common.hpp>

#include "cpu_map_scheduling.hpp"
#include "cpu_streams_calculation.hpp"

using namespace testing;
using namespace InferenceEngine;
using namespace ov;

namespace {

struct UseHTTestCase {
bool input_ht_value;
bool input_ht_changed;
std::string input_pm_hint;
std::vector<std::vector<int>> proc_type_table;
std::vector<std::vector<int>> result_table;
bool output_ht_value;
};

class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<UseHTTestCase>> {
public:
void SetUp() override {
auto test_data = std::get<0>(GetParam());

std::vector<std::vector<int>> test_result_table =
ov::intel_cpu::apply_hyper_threading(test_data.input_ht_value,
test_data.input_ht_changed,
test_data.input_pm_hint,
test_data.proc_type_table);

ASSERT_EQ(test_data.result_table, test_result_table);
ASSERT_EQ(test_data.input_ht_value, test_data.output_ht_value);
}
};

UseHTTestCase _2sockets_false_latency = {
false,
true,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};

UseHTTestCase _2sockets_false_throughput = {
false,
true,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};

UseHTTestCase _2sockets_true_latency = {
true,
true,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
true,
};

UseHTTestCase _2sockets_true_throughput = {
true,
true,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
true,
};

UseHTTestCase _2sockets_default_1_latency = {
false,
false,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};

UseHTTestCase _2sockets_default_1_throughput = {
false,
false,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};

UseHTTestCase _2sockets_default_2_latency = {
true,
false,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};

UseHTTestCase _2sockets_default_2_throughput = {
true,
false,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};

UseHTTestCase _1sockets_1_false_latency = {
false,
true,
"LATENCY",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};

UseHTTestCase _1sockets_1_false_throughput = {
false,
true,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};

UseHTTestCase _1sockets_1_true_latency = {
true,
true,
"LATENCY",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};

UseHTTestCase _1sockets_1_true_throughput = {
true,
true,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};

UseHTTestCase _1sockets_1_default_1_latency = {
false,
false,
"LATENCY",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};

UseHTTestCase _1sockets_1_default_1_throughput = {
false,
false,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};

UseHTTestCase _1sockets_1_default_2_latency = {
true,
false,
"LATENCY",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};

UseHTTestCase _1sockets_1_default_2_throughput = {
true,
false,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};

UseHTTestCase _1sockets_2_false_latency = {
false,
true,
"LATENCY",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};

UseHTTestCase _1sockets_2_false_throughput = {
false,
true,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};

UseHTTestCase _1sockets_2_true_latency = {
true,
true,
"LATENCY",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};

UseHTTestCase _1sockets_2_true_throughput = {
true,
true,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};

UseHTTestCase _1sockets_2_default_1_latency = {
false,
false,
"LATENCY",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};

UseHTTestCase _1sockets_2_default_1_throughput = {
false,
false,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};

UseHTTestCase _1sockets_2_default_2_latency = {
true,
false,
"LATENCY",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};

UseHTTestCase _1sockets_2_default_2_throughput = {
true,
false,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};

TEST_P(UseHTTests, UseHT) {}

INSTANTIATE_TEST_SUITE_P(UseHTTable,
UseHTTests,
testing::Values(_2sockets_false_latency,
_2sockets_true_latency,
_2sockets_default_1_latency,
_2sockets_default_2_latency,
_1sockets_1_false_latency,
_1sockets_1_true_latency,
_1sockets_1_default_1_latency,
_1sockets_1_default_2_latency,
_1sockets_2_false_latency,
_1sockets_2_true_latency,
_1sockets_2_default_1_latency,
_1sockets_2_default_2_latency,
_2sockets_false_throughput,
_2sockets_true_throughput,
_2sockets_default_1_throughput,
_2sockets_default_2_throughput,
_1sockets_1_false_throughput,
_1sockets_1_true_throughput,
_1sockets_1_default_1_throughput,
_1sockets_1_default_2_throughput,
_1sockets_2_false_throughput,
_1sockets_2_true_throughput,
_1sockets_2_default_1_throughput,
_1sockets_2_default_2_throughput));

} // namespace
Loading

0 comments on commit d571ab3

Please sign in to comment.