Skip to content

Commit

Permalink
set rpc_polling_time from session options and run options
Browse files Browse the repository at this point in the history
  • Loading branch information
HectorSVC committed Oct 5, 2024
1 parent 3a4249d commit 4e01853
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 32 deletions.
1 change: 1 addition & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3630,6 +3630,7 @@ struct OrtApi {
* "profiling_level": QNN profiling level, options: "off", "basic", "detailed". Default to off.
* "profiling_file_path": QNN profiling file path if ETW not enabled.
* "rpc_control_latency": QNN RPC control latency.
* "rpc_polling_time": QNN RPC polling time.
* "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
* "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
* "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ static const char* const kOrtRunOptionsConfigQnnPerfModePostRun = "qnn.htp_perf_
// Set RPC control latency for QNN HTP backend
static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_control_latency";

// Set RPC polling time for QNN HTP backend, only supported by v69 or higher
static const char* const kOrtRunOptionsConfigQnnRpcPollingTime = "qnn.rpc_polling_time";

// Set graph annotation id for CUDA EP. Use with enable_cuda_graph=true.
// The value should be an integer. If the value is not set, the default value is 0 and
// ORT session only captures one cuda graph before another capture is requested.
Expand Down
51 changes: 28 additions & 23 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -890,33 +890,38 @@ Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id,
}

Status QnnBackendManager::SetRpcControlLatency(uint32_t htp_power_config_client_id,
uint32_t rpc_control_latency) {
uint32_t rpc_control_latency,
uint32_t rpc_polling_time) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;

// Set rpc control latency here
constexpr int kNumRpcPollingPowerConfigs = 2;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs(kNumRpcPollingPowerConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_control_latency_cfg = rpc_power_configs[0];
rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;

if (rpc_control_latency != 0) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;

// Set rpc control latency here, but note that v68 doesn't support rpc polling mode.
constexpr int kNumRpcPollingPowerConfigs = 2;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs(kNumRpcPollingPowerConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_control_latency_cfg = rpc_power_configs[0];
// v68 doesn't support this.
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_polling_time = rpc_power_configs[1];
rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency;
rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
rpc_polling_time.rpcPollingTimeConfig = 9999;
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr =
ObtainNullTermPtrVector(rpc_power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency.");
}

// only v69 or higher support rpc polling mode.
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_polling_time_cfg = rpc_power_configs[1];
rpc_polling_time_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
if (rpc_polling_time > 0) {
rpc_polling_time_cfg.rpcPollingTimeConfig = rpc_polling_time;
}
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr =
ObtainNullTermPtrVector(rpc_power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency or polling time.");

return Status::OK();
}

Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ class QnnBackendManager {
HtpPerformanceMode htp_performance_mode);

Status SetRpcControlLatency(uint32_t htp_power_config_client_id,
uint32_t rpc_control_latency);
uint32_t rpc_control_latency,
uint32_t rpc_polling_time);

const QNN_INTERFACE_VER_TYPE& GetQnnInterface() { return qnn_interface_; }

Expand Down
41 changes: 34 additions & 7 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,17 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << default_rpc_control_latency_;
}

static const std::string RPC_POLLING_TIME = "rpc_polling_time";
auto polling_time_pos = provider_options_map.find(RPC_POLLING_TIME);
if (polling_time_pos != provider_options_map.end()) {
default_rpc_polling_time_ = static_cast<uint32_t>(std::stoul(polling_time_pos->second));
if (default_rpc_polling_time_ > QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_MAX_RPC_POLLING_TIME) {
LOGS_DEFAULT(WARNING) << "rpc_polling_time exceed the max limit, use the limit instead: " << QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_MAX_RPC_POLLING_TIME;

Check warning on line 311 in onnxruntime/core/providers/qnn/qnn_execution_provider.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/qnn/qnn_execution_provider.cc:311: Lines should be <= 120 characters long [whitespace/line_length] [2]
default_rpc_polling_time_ = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_MAX_RPC_POLLING_TIME;
}
LOGS_DEFAULT(VERBOSE) << "rpc_polling_time: " << default_rpc_polling_time_;
}

// default_htp_performance_mode from QNN EP option.
// set it once only for each thread as default so user don't need to set it for every session run
static const std::string HTP_PERFORMANCE_MODE = "htp_performance_mode";
Expand Down Expand Up @@ -984,7 +995,8 @@ QNNExecutionProvider::PerThreadContext::PerThreadContext(qnn::QnnBackendManager*
uint32_t device_id,
uint32_t core_id,
qnn::HtpPerformanceMode default_htp_performance_mode,
uint32_t default_rpc_control_latency)
uint32_t default_rpc_control_latency,
uint32_t default_rpc_polling_time)
: qnn_backend_manager_(qnn_backend_manager) {
Status rt = qnn_backend_manager_->CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id_);
is_htp_power_config_id_valid_ = rt.IsOK();
Expand All @@ -995,9 +1007,10 @@ QNNExecutionProvider::PerThreadContext::PerThreadContext(qnn::QnnBackendManager*
ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetHtpPowerConfig(htp_power_config_id_,
default_htp_performance_mode));
}
if (default_rpc_control_latency > 0) {
if (default_rpc_control_latency > 0 || default_rpc_polling_time > 0) {
ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetRpcControlLatency(htp_power_config_id_,
default_rpc_control_latency));
default_rpc_control_latency,
default_rpc_polling_time));
}
}
}
Expand Down Expand Up @@ -1028,7 +1041,9 @@ QNNExecutionProvider::PerThreadContext& QNNExecutionProvider::GetPerThreadContex
if (context_state_.retired_context_pool.empty()) {
uint32_t core_id = 0;
context = std::make_shared<PerThreadContext>(qnn_backend_manager_.get(), device_id_, core_id,
default_htp_performance_mode_, default_rpc_control_latency_);
default_htp_performance_mode_,
default_rpc_control_latency_,
default_rpc_polling_time_);
} else {
context = context_state_.retired_context_pool.back();
context_state_.retired_context_pool.pop_back();
Expand Down Expand Up @@ -1081,7 +1096,18 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio
uint32_t rpc_control_latency = 0;
if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnRpcControlLatency, rpc_latency)) {
rpc_control_latency = static_cast<uint32_t>(std::stoul(rpc_latency));
LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency;
LOGS_DEFAULT(VERBOSE) << kOrtRunOptionsConfigQnnRpcControlLatency << rpc_control_latency;
}

std::string rpc_polling = "";
uint32_t rpc_polling_time = 0;
if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnRpcPollingTime, rpc_polling)) {
rpc_polling_time = static_cast<uint32_t>(std::stoul(rpc_polling));
if (rpc_polling_time > QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_MAX_RPC_POLLING_TIME) {
LOGS_DEFAULT(WARNING) << "rpc_polling_time exceed the max limit, use the limit instead: " << QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_MAX_RPC_POLLING_TIME;

Check warning on line 1107 in onnxruntime/core/providers/qnn/qnn_execution_provider.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/qnn/qnn_execution_provider.cc:1107: Lines should be <= 120 characters long [whitespace/line_length] [2]
rpc_polling_time = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_MAX_RPC_POLLING_TIME;
}
LOGS_DEFAULT(VERBOSE) << kOrtRunOptionsConfigQnnRpcControlLatency << rpc_polling_time;
}

if (GetPerThreadContext().IsHtpPowerConfigIdValid()) {
Expand All @@ -1090,9 +1116,10 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio
htp_performance_mode));
}

if (rpc_control_latency > 0) {
if (rpc_control_latency > 0 || rpc_polling_time > 0) {
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetRpcControlLatency(GetPerThreadContext().GetHtpPowerConfigId(),
rpc_control_latency));
rpc_control_latency,
rpc_polling_time));
}
}

Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/qnn/qnn_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ class QNNExecutionProvider : public IExecutionProvider {
uint32_t device_id_ = 0;
qnn::HtpPerformanceMode default_htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault;
uint32_t default_rpc_control_latency_ = 0;
uint32_t default_rpc_polling_time_ = 0;
bool enable_HTP_FP16_precision_ = true;
bool share_ep_contexts_ = false;
#ifdef _WIN32
Expand All @@ -159,7 +160,8 @@ class QNNExecutionProvider : public IExecutionProvider {
PerThreadContext(qnn::QnnBackendManager* qnn_backend_manager,
uint32_t device_id, uint32_t core_id,
qnn::HtpPerformanceMode default_htp_performance_mode,
uint32_t default_rpc_control_latency);
uint32_t default_rpc_control_latency,
uint32_t default_rpc_polling_time);
~PerThreadContext();
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(PerThreadContext);

Expand Down
5 changes: 5 additions & 0 deletions onnxruntime/test/providers/qnn/qnn_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgSessionRunOption) {
#else
options["backend_path"] = "libQnnHtp.so";
#endif
options["rpc_polling_time"] = "8888";

auto qnn_ep = QnnExecutionProviderWithOptions(options, &session_opts);
EXPECT_TRUE(session_obj.RegisterExecutionProvider(std::move(qnn_ep)).IsOK());
Expand All @@ -666,6 +667,10 @@ TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgSessionRunOption) {
ASSERT_TRUE(rt.IsOK());
rt = run_opts.config_options.AddConfigEntry(kOrtRunOptionsConfigQnnPerfModePostRun, perf_modes[post_i].c_str());
ASSERT_TRUE(rt.IsOK());
rt = run_opts.config_options.AddConfigEntry(kOrtRunOptionsConfigQnnRpcControlLatency, "1200");
ASSERT_TRUE(rt.IsOK());
rt = run_opts.config_options.AddConfigEntry(kOrtRunOptionsConfigQnnRpcPollingTime, "1000");
ASSERT_TRUE(rt.IsOK());

threads.push_back(std::thread(RunSessionAndVerify, std::ref(session_obj), run_opts,
model->builder.feeds_, model->builder.output_names_,
Expand Down

0 comments on commit 4e01853

Please sign in to comment.