Skip to content

Commit

Permalink
Qualcomm AI Engine Direct - Support QNN 2.28
Browse files Browse the repository at this point in the history
Differential Revision: D65949627

Pull Request resolved: #6811
  • Loading branch information
shewu-quic authored Jan 13, 2025
1 parent 44d223d commit 8cd3afd
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set -o xtrace
build_qnn_backend() {
echo "Start building qnn backend."
export ANDROID_NDK_ROOT=/opt/ndk
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"

bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
Expand Down
4 changes: 2 additions & 2 deletions .ci/scripts/setup-qnn-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ install_qnn() {
QNN_INSTALLATION_DIR=/tmp/qnn
mkdir -p "${QNN_INSTALLATION_DIR}"

curl -Lo /tmp/v2.25.0.24.07.28.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.25.0.240728.zip"
curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip"
echo "Finishing downloading qnn sdk."
unzip -qo /tmp/v2.25.0.24.07.28.zip -d /tmp
unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp
echo "Finishing unzip qnn sdk."


Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ echo "COREML option ${COREML}"
if [[ "${MODE}" =~ .*qnn.* ]]; then
QNN=ON
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
export PYTHONPATH=".."
cp schema/program.fbs exir/_serialize/program.fbs
Expand Down
9 changes: 9 additions & 0 deletions backends/qualcomm/runtime/backends/QnnBackendCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary(
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
num_graphs = binaryinfo->contextBinaryInfoV2.numGraphs;
graphs = binaryinfo->contextBinaryInfoV2.graphs;
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
num_graphs = binaryinfo->contextBinaryInfoV3.numGraphs;
graphs = binaryinfo->contextBinaryInfoV3.graphs;
#endif
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN BinaryInfo version %d.", binaryinfo->version);
Expand All @@ -62,6 +67,10 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary(
RetrieveGraphInfo<QnnSystemContext_GraphInfoV1_t>(graphs[i].graphInfoV1);
} else if (graphs->version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_2) {
RetrieveGraphInfo<QnnSystemContext_GraphInfoV2_t>(graphs[i].graphInfoV2);
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
} else if (graphs->version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) {
RetrieveGraphInfo<QnnSystemContext_GraphInfoV3_t>(graphs[i].graphInfoV3);
#endif
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN GraphInfo version %d.", binaryinfo->version);
Expand Down
43 changes: 31 additions & 12 deletions backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,53 @@ using executorch::runtime::Error;
Error HtpBackendCache::RetrieveBackendBinaryInfo(
const QnnSystemContext_BinaryInfo_t* binaryinfo) {
QnnHtpSystemContext_HwBlobInfo_t* htp_hwblobinfo = nullptr;
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
QnnHtpSystemContext_GraphBlobInfo_t* htp_graphblobinfo = nullptr;
#endif

if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
htp_hwblobinfo = static_cast<QnnHtpSystemContext_HwBlobInfo_t*>(
binaryinfo->contextBinaryInfoV1.hwInfoBlob);
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
htp_hwblobinfo = static_cast<QnnHtpSystemContext_HwBlobInfo_t*>(
binaryinfo->contextBinaryInfoV2.hwInfoBlob);
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
htp_graphblobinfo = static_cast<QnnHtpSystemContext_GraphBlobInfo_t*>(
binaryinfo->contextBinaryInfoV3.graphs->graphInfoV3.graphBlobInfo);
#endif
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN BinaryInfo version %d.", binaryinfo->version);
return Error::Internal;
}

if (htp_hwblobinfo == nullptr) {
QNN_EXECUTORCH_LOG_WARN(
"Htp hardware blob information is not found in binary information.");
return Error::Ok;
if (htp_hwblobinfo) {
if (htp_hwblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version);
return Error::Internal;
}
}

if (htp_hwblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version);
return Error::Internal;
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
if (htp_graphblobinfo) {
if (htp_graphblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_graphblobinfo).contextBinaryGraphBlobInfoV1.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp graph blob info version %d.",
htp_graphblobinfo->version);
return Error::Internal;
}
}
#endif

return Error::Ok;
}
Expand Down
2 changes: 1 addition & 1 deletion docs/source/build-run-qualcomm-ai-engine-direct-backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ This example is verified with SM8550 and SM8450.
- Click the "Get Software" button to download a version of QNN SDK.
- However, at the moment of updating this tutorial, the above website doesn't provide QNN SDK newer than 2.22.6.
- The below is public links to download various QNN versions. Hope they can be publicly discoverable soon.
- [QNN 2.26.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.26.0.240828.zip)
- [QNN 2.28.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip)

The directory with installed Qualcomm AI Engine Direct SDK looks like:
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This tutorial demonstrates how to export Llama 3 8B Instruct for Qualcomm AI Eng
- Follow [the README for executorch llama](https://github.com/pytorch/executorch/tree/main/examples/models/llama) to know how to run a llama model on mobile via ExecuTorch.
- A Qualcomm device with 16GB RAM
- We are continuing to optimize our memory usage to ensure compatibility with lower memory devices.
- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.26.0 or above.
- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.28.0 or above.

## Instructions

Expand Down
2 changes: 1 addition & 1 deletion shim/xplat/executorch/backends/qualcomm/qnn_version.bzl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def get_qnn_library_verision():
return "2.26"
return "2.28"

0 comments on commit 8cd3afd

Please sign in to comment.