quic · shubhagr-quic · Oct 22, 2024
@@ -76,6 +76,20 @@
         default=None,
         help="Set full batch size to enable continuous batching mode, default is None",
     )
+    parser.add_argument(
+        "--enable_qnn",
+        "--enable-qnn",
+        action="store_true",
+        default=False,
+        help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
+             If not provided, the default configuration will be used.\
+             Sample Config: QEfficient/cloud/compile/qnn_config.json",
+    )
+    parser.add_argument(
+        "qnn_config",
+        nargs="?",
+        type=str,
+    )
     # FIXME(ochougul): Allow extra compilation arguments
     args = parser.parse_args()
     QEfficient.compile(**vars(args))
@@ -35,6 +35,8 @@ def main(
     local_model_dir: Optional[str] = None,
     cache_dir: Optional[str] = None,
     hf_token: Optional[str] = None,
+    enable_qnn: Optional[bool] = False,
+    qnn_config: Optional[str] = None,
 ) -> None:
     """
     1. Check if compiled qpc for given config already exists, if it does jump to execute, else
@@ -60,7 +62,8 @@ def main(
         :local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.``
         :cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
         :hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``
-
+        :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
+        :qnn_config (str): QNN Config parameters file (if provided).
     .. code-block:: bash
 
         python -m QEfficient.cloud.infer OPTIONS
@@ -74,7 +77,17 @@ def main(
     )
 
     qpc_dir_path = get_qpc_dir_path(
-        model_name, num_cores, mos, batch_size, prompt_len, ctx_len, mxfp6, mxint8, device_group, full_batch_size
+        model_name,
+        num_cores,
+        mos,
+        batch_size,
+        prompt_len,
+        ctx_len,
+        mxfp6,
+        mxint8,
+        device_group,
+        full_batch_size,
+        enable_qnn,
     )
 
     # Handle qpc generation
@@ -104,6 +117,8 @@ def main(
             mos=mos,
             device_group=device_group,
             full_batch_size=full_batch_size,
+            enable_qnn=enable_qnn,
+            qnn_config=qnn_config,
         )
 
     #########
@@ -197,6 +212,20 @@ def main(
         default=None,
         help="Set full batch size to enable continuous batching mode, default is None",
     )
+    parser.add_argument(
+        "--enable_qnn",
+        "--enable-qnn",
+        action="store_true",
+        default=False,
+        help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
+             If not provided, the default configuration will be used.\
+             Sample Config: QEfficient/cloud/compile/qnn_config.json",
+    )
+    parser.add_argument(
+        "qnn_config",
+        nargs="?",
+        type=str,
+    )
 
     args = parser.parse_args()
     if args.verbose:

@@ -12,6 +12,7 @@
 import warnings
 from typing import List, Optional, Tuple
 
+from QEfficient.compile.qnn_compiler import compile as qnnCompile
 from QEfficient.utils.logging_utils import logger
 
 
@@ -125,6 +126,8 @@ def compile(
     mxint8: bool = False,
     custom_io_file_path: Optional[str] = None,
     full_batch_size: Optional[int] = None,
+    enable_qnn: Optional[bool] = False,
+    qnn_config: Optional[str] = None,
     **kwargs,
 ) -> str:
     """
@@ -148,6 +151,8 @@ def compile(
         :mxfp6 (bool): Enable compilation for ``MXFP6`` precision.  ``Defaults to True.``
         :mxint8 (bool): Compress Present/Past KV to ``MXINT8`` using ``CustomIO`` config. ``Defaults to False.``
         :custom_io_file_path (str): Path to ``customIO`` file (formatted as a string). ``Defaults to None.``
+        :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
+        :qnn_config (str): QNN Config parameters file (if provided).
 
     Returns:
         :str: Path to compiled ``qpc`` package.
@@ -166,28 +171,45 @@ def compile(
         full_batch_size=full_batch_size,
     )
 
-    # Select the customIO config based on the mx flag.
-    custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml"
-
-    if custom_io_file_path is None:
-        custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name)
-
-    if not os.path.isfile(custom_io_file_path):
-        raise FileNotFoundError(
-            f"Custom IO file {custom_io_file_name} is not present at the expected path {custom_io_file_path}. Please pass the correct file path or rerun infer/export API"
+    if enable_qnn:
+        qpc_path = qnnCompile(
+            onnx_path=onnx_path,
+            qpc_path=qpc_path,
+            num_cores=num_cores,
+            batch_size=batch_size,
+            prompt_len=prompt_len,
+            ctx_len=ctx_len,
+            mxfp6=mxfp6,
+            mxint8=mxint8,
+            aic_enable_depth_first=aic_enable_depth_first,
+            mos=mos,
+            device_group=device_group,
+            full_batch_size=full_batch_size,
+            qnn_config=qnn_config,
         )
-
-    _, qpc_path = compile_kv_model_on_cloud_ai_100(
-        onnx_path=onnx_path,
-        specializations_json=specialization_json_path,
-        num_cores=num_cores,
-        custom_io_path=custom_io_file_path,
-        base_path=qpc_path,
-        mxfp6=mxfp6,
-        aic_enable_depth_first=aic_enable_depth_first,
-        mos=mos,
-        device_group=device_group,
-    )
-
-    logger.info(f"Compiled QPC files can be found here: {qpc_path}")
+        logger.info(f"QNN Compiled QPC files can be found here: {qpc_path}")
+    else:
+        # Select the customIO config based on the mx flag.
+        custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml"
+
+        if custom_io_file_path is None:
+            custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name)
+
+        if not os.path.isfile(custom_io_file_path):
+            raise FileNotFoundError(
+                f"Custom IO file {custom_io_file_name} is not present at the expected path {custom_io_file_path}. Please pass the correct file path or rerun infer/export API"
+            )
+
+        _, qpc_path = compile_kv_model_on_cloud_ai_100(
+            onnx_path=onnx_path,
+            specializations_json=specialization_json_path,
+            num_cores=num_cores,
+            custom_io_path=custom_io_file_path,
+            base_path=qpc_path,
+            mxfp6=mxfp6,
+            aic_enable_depth_first=aic_enable_depth_first,
+            mos=mos,
+            device_group=device_group,
+        )
+        logger.info(f"Compiled QPC files can be found here: {qpc_path}")
     return qpc_path