Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QNN Compilation Support. (#159) #171

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions QEfficient/cloud/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@
default=None,
help="Set full batch size to enable continuous batching mode, default is None",
)
parser.add_argument(
"--enable_qnn",
"--enable-qnn",
action="store_true",
default=False,
help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
If not provided, the default configuration will be used.\
Sample Config: QEfficient/cloud/compile/qnn_config.json",
)
parser.add_argument(
"qnn_config",
nargs="?",
type=str,
)
# FIXME(ochougul): Allow extra compilation arguments
args = parser.parse_args()
QEfficient.compile(**vars(args))
33 changes: 31 additions & 2 deletions QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def main(
local_model_dir: Optional[str] = None,
cache_dir: Optional[str] = None,
hf_token: Optional[str] = None,
enable_qnn: Optional[bool] = False,
qnn_config: Optional[str] = None,
) -> None:
"""
1. Check if compiled qpc for given config already exists, if it does jump to execute, else
Expand All @@ -60,7 +62,8 @@ def main(
:local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.``
:cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
:hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``

:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): QNN Config parameters file (if provided).
.. code-block:: bash

python -m QEfficient.cloud.infer OPTIONS
Expand All @@ -74,7 +77,17 @@ def main(
)

qpc_dir_path = get_qpc_dir_path(
model_name, num_cores, mos, batch_size, prompt_len, ctx_len, mxfp6, mxint8, device_group, full_batch_size
model_name,
num_cores,
mos,
batch_size,
prompt_len,
ctx_len,
mxfp6,
mxint8,
device_group,
full_batch_size,
enable_qnn,
)

# Handle qpc generation
Expand Down Expand Up @@ -104,6 +117,8 @@ def main(
mos=mos,
device_group=device_group,
full_batch_size=full_batch_size,
enable_qnn=enable_qnn,
qnn_config=qnn_config,
)

#########
Expand Down Expand Up @@ -197,6 +212,20 @@ def main(
default=None,
help="Set full batch size to enable continuous batching mode, default is None",
)
parser.add_argument(
"--enable_qnn",
"--enable-qnn",
action="store_true",
default=False,
help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
If not provided, the default configuration will be used.\
Sample Config: QEfficient/cloud/compile/qnn_config.json",
)
parser.add_argument(
"qnn_config",
nargs="?",
type=str,
)

args = parser.parse_args()
if args.verbose:
Expand Down
68 changes: 45 additions & 23 deletions QEfficient/compile/compile_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import warnings
from typing import List, Optional, Tuple

from QEfficient.compile.qnn_compiler import compile as qnnCompile
from QEfficient.utils.logging_utils import logger


Expand Down Expand Up @@ -125,6 +126,8 @@ def compile(
mxint8: bool = False,
custom_io_file_path: Optional[str] = None,
full_batch_size: Optional[int] = None,
enable_qnn: Optional[bool] = False,
qnn_config: Optional[str] = None,
**kwargs,
) -> str:
"""
Expand All @@ -148,6 +151,8 @@ def compile(
:mxfp6 (bool): Enable compilation for ``MXFP6`` precision. ``Defaults to True.``
:mxint8 (bool): Compress Present/Past KV to ``MXINT8`` using ``CustomIO`` config. ``Defaults to False.``
:custom_io_file_path (str): Path to ``customIO`` file (formatted as a string). ``Defaults to None.``
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): QNN Config parameters file (if provided).

Returns:
:str: Path to compiled ``qpc`` package.
Expand All @@ -166,28 +171,45 @@ def compile(
full_batch_size=full_batch_size,
)

# Select the customIO config based on the mx flag.
custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml"

if custom_io_file_path is None:
custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name)

if not os.path.isfile(custom_io_file_path):
raise FileNotFoundError(
f"Custom IO file {custom_io_file_name} is not present at the expected path {custom_io_file_path}. Please pass the correct file path or rerun infer/export API"
if enable_qnn:
qpc_path = qnnCompile(
onnx_path=onnx_path,
qpc_path=qpc_path,
num_cores=num_cores,
batch_size=batch_size,
prompt_len=prompt_len,
ctx_len=ctx_len,
mxfp6=mxfp6,
mxint8=mxint8,
aic_enable_depth_first=aic_enable_depth_first,
mos=mos,
device_group=device_group,
full_batch_size=full_batch_size,
qnn_config=qnn_config,
)

_, qpc_path = compile_kv_model_on_cloud_ai_100(
onnx_path=onnx_path,
specializations_json=specialization_json_path,
num_cores=num_cores,
custom_io_path=custom_io_file_path,
base_path=qpc_path,
mxfp6=mxfp6,
aic_enable_depth_first=aic_enable_depth_first,
mos=mos,
device_group=device_group,
)

logger.info(f"Compiled QPC files can be found here: {qpc_path}")
logger.info(f"QNN Compiled QPC files can be found here: {qpc_path}")
else:
# Select the customIO config based on the mx flag.
custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml"

if custom_io_file_path is None:
custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name)

if not os.path.isfile(custom_io_file_path):
raise FileNotFoundError(
f"Custom IO file {custom_io_file_name} is not present at the expected path {custom_io_file_path}. Please pass the correct file path or rerun infer/export API"
)

_, qpc_path = compile_kv_model_on_cloud_ai_100(
onnx_path=onnx_path,
specializations_json=specialization_json_path,
num_cores=num_cores,
custom_io_path=custom_io_file_path,
base_path=qpc_path,
mxfp6=mxfp6,
aic_enable_depth_first=aic_enable_depth_first,
mos=mos,
device_group=device_group,
)
logger.info(f"Compiled QPC files can be found here: {qpc_path}")
return qpc_path
Loading
Loading