Lint & Format (quic#53)

* Lint & Format - Added linting and formatting github actions - Formatted entire codebase - Fixed linter errors - Removed `# noqa` with fix Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Split test config into multiple-lines Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Fix external repo for workflow Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Format newly added files Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> --------- Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com>
abukhoy · Jul 3, 2024 · e00978d · e00978d
1 parent 6846c8a
commit e00978d
Show file tree

Hide file tree

Showing 25 changed files with 393 additions and 239 deletions.
diff --git a/.github/workflows/lint-format.yml b/.github/workflows/lint-format.yml
@@ -0,0 +1,19 @@
+name: Lint & Format
+on: [pull_request]
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - run: pip3 install ruff
+      - run: ruff check
+    env:
+      RUFF_OUTPUT_FORMAT: github
+  format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - run: pip3 install ruff
+      - run: ruff format --check
+    env:
+      RUFF_OUTPUT_FORMAT: github
diff --git a/QEfficient/__init__.py b/QEfficient/__init__.py
@@ -5,12 +5,22 @@
 #
 # -----------------------------------------------------------------------------
 
-from QEfficient.compile.compile_helper import compile  # noqa: F401
+from QEfficient.compile.compile_helper import compile
 from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
-from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv  # noqa: F401
-from QEfficient.src import QEffAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader  # noqa: F401
-from QEfficient.transformers.transform import transform  # noqa: F401
+from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
+from QEfficient.src import QEffAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader
+from QEfficient.transformers.transform import transform
 
 # Users can use QEfficient.export for exporting models to ONNX
 export = qualcomm_efficient_converter
 __version__ = "0.0.1.dev0"
+
+__all__ = [
+    "transform",
+    "export",
+    "compile",
+    "cloud_ai_100_exec_kv",
+    "QEffAutoModel",
+    "QEFFAutoModelForCausalLM",
+    "QEFFCommonLoader",
+]
diff --git a/QEfficient/cloud/export.py b/QEfficient/cloud/export.py
@@ -19,7 +19,13 @@
 ROOT_DIR = os.path.dirname(os.path.abspath(""))
 
 
-def get_onnx_model_path(model_name: str, cache_dir: Optional[str] = None, tokenizer: Optional[Union[PreTrainedTokenizerFast, PreTrainedTokenizer]]=None, hf_token: Optional[str] = None, local_model_dir: Optional[str] = None):
+def get_onnx_model_path(
+    model_name: str,
+    cache_dir: Optional[str] = None,
+    tokenizer: Optional[Union[PreTrainedTokenizerFast, PreTrainedTokenizer]] = None,
+    hf_token: Optional[str] = None,
+    local_model_dir: Optional[str] = None,
+):
     """
     exports the model to onnx if pre-exported file is not found and returns onnx_model_path
     """
@@ -33,19 +39,21 @@ def get_onnx_model_path(model_name: str, cache_dir: Optional[str] = None, tokeni
         # Export to the Onnx
         logger.info(f"Exporting Pytorch {model_name} model to ONNX...")
         _, generated_onnx_model_path = qualcomm_efficient_converter(
-                model_name=model_name,
-                local_model_dir=local_model_dir,
-                tokenizer=tokenizer,
-                onnx_dir_path=onnx_dir_path,
-                kv=True,
-                form_factor="cloud",
-                hf_token=hf_token,
-                cache_dir=cache_dir
-            ) # type: ignore
-        logger.info(f"Generated Onnx_path {generated_onnx_model_path} \nOnnx_model_path {onnx_model_path} \nand Onnx_dir_path is {onnx_dir_path}")
+            model_name=model_name,
+            local_model_dir=local_model_dir,
+            tokenizer=tokenizer,
+            onnx_dir_path=onnx_dir_path,
+            kv=True,
+            form_factor="cloud",
+            hf_token=hf_token,
+            cache_dir=cache_dir,
+        )  # type: ignore
+        logger.info(
+            f"Generated Onnx_path {generated_onnx_model_path} \nOnnx_model_path {onnx_model_path} \nand Onnx_dir_path is {onnx_dir_path}"
+        )
         assert (
-                generated_onnx_model_path == onnx_model_path
-            ), f"ONNX files were generated at an unusual location, expected {onnx_model_path}, got {generated_onnx_model_path}"
+            generated_onnx_model_path == onnx_model_path
+        ), f"ONNX files were generated at an unusual location, expected {onnx_model_path}, got {generated_onnx_model_path}"
     return onnx_model_path
 
 
@@ -63,14 +71,16 @@ def main(
     :hf_token: str. HuggingFace login token to access private repos.
     :local_model_dir: str. Path to custom model weights and config files.
     """
-    cache_dir = check_and_assign_cache_dir(local_model_dir,cache_dir)
+    cache_dir = check_and_assign_cache_dir(local_model_dir, cache_dir)
     get_onnx_model_path(model_name=model_name, cache_dir=cache_dir, hf_token=hf_token, local_model_dir=local_model_dir)
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Export script.")
     parser.add_argument("--model_name", "--model-name", required=True, help="HF Model card name/id")
-    parser.add_argument("--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files")
+    parser.add_argument(
+        "--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files"
+    )
     parser.add_argument(
         "--cache_dir",
         "--cache-dir",

diff --git a/QEfficient/cloud/infer.py b/QEfficient/cloud/infer.py
@@ -30,7 +30,7 @@
 def main(
     model_name: str,
     num_cores: int,
-    prompt: Optional[str] = None, # type: ignore
+    prompt: Optional[str] = None,  # type: ignore
     local_model_dir: Optional[str] = None,
     prompts_txt_file_path: Optional[str] = None,
     aic_enable_depth_first: bool = False,
@@ -51,9 +51,14 @@ def main(
         num_cores, mos, batch_size, prompt_len, ctx_len, mxfp6, mxint8, device_group
     )
     prompt: List[str] = check_batch_size_and_num_prompts(prompt, prompts_txt_file_path, batch_size)
-    cache_dir = check_and_assign_cache_dir(local_model_dir,cache_dir)
+    cache_dir = check_and_assign_cache_dir(local_model_dir, cache_dir)
 
-    tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=(local_model_dir if local_model_dir else model_name), cache_dir=cache_dir, hf_token=hf_token, local_model_dir=local_model_dir)
+    tokenizer = load_hf_tokenizer(
+        pretrained_model_name_or_path=(local_model_dir if local_model_dir else model_name),
+        cache_dir=cache_dir,
+        hf_token=hf_token,
+        local_model_dir=local_model_dir,
+    )
 
     qpc_path_exists, qpc_dir_path = qpc_exists(model_name, qpc_base_dir_name)
     # Handle qpc generation
@@ -104,7 +109,9 @@ def main(
         description="Inference command, the model will be downloaded from HF, optmized, compiled, executed on Cloud AI 100"
     )
     parser.add_argument("--model-name", "--model_name", required=True, help="HF Model card name/id")
-    parser.add_argument("--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files")
+    parser.add_argument(
+        "--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files"
+    )
     parser.add_argument(
         "--cache-dir",
         "--cache_dir",

diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py
@@ -109,7 +109,7 @@ def compile(
     ctx_len: int = 128,
     mxfp6: bool = True,
     mxint8: bool = False,
-    **kwargs
+    **kwargs,
 ) -> str:
     # Dynamically create the specializations JSON
     """

diff --git a/QEfficient/exporter/export_utils.py b/QEfficient/exporter/export_utils.py
@@ -169,7 +169,7 @@ def fix_onnx_fp16(
     ort_outputs: List[np.ndarray],
     gen_models_path: str,
     model_base_name: str,
-    pt_outputs: Dict[str, torch.Tensor]
+    pt_outputs: Dict[str, torch.Tensor],
 ) -> str:
     finfo = np.finfo(np.float16)
     fp16_max = finfo.max
@@ -218,7 +218,7 @@ def fix_onnx_fp16(
             os.path.join(gen_models_path, f"{model_base_name}.onnx"),
             os.path.join(gen_models_path, f"{model_base_name}.onnxweights.data"),
         )
-        
+
         model_base_name += "_clipped_fp16"
         onnx.save_model(
             model,

diff --git a/QEfficient/src/__init__.py b/QEfficient/src/__init__.py
@@ -5,5 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from QEfficient.src._transformers.auto import QEffAutoModel, QEFFAutoModelForCausalLM  # noqa: F401
-from QEfficient.src.common import QEFFCommonLoader  # noqa: F401
+from QEfficient.src._transformers.auto import QEffAutoModel, QEFFAutoModelForCausalLM
+from QEfficient.src.common import QEFFCommonLoader
+
+__all__ = ["QEffAutoModel", "QEFFAutoModelForCausalLM", "QEFFCommonLoader"]
diff --git a/QEfficient/src/_transformers/auto.py b/QEfficient/src/_transformers/auto.py
@@ -26,17 +26,21 @@ class QEFFTransformersBase(QEFFBaseModel):
     """
     Parent class for models QEFF provides from transformers i.e. (AutoModel, AutoModelForCausalLM, AutoModelForAudioClassification etc.) from src/transformers/models/auto/modeling_auto.py file.
     """
-    def __init__(self, model: nn.Module, transform:bool = True) -> None:
-        assert (model.__class__ in MODEL_FOR_CAUSAL_LM_MAPPING.values() or
-                # FIXME: Use model architectures here instead of complete dictionary TransformersToQEffModulesDict
-                model.__class__ in TransformersToQEffModulesDict.values()), f"Given model{model.__class__.__name__} could not be found in transformers library i.e. {MODEL_FOR_CAUSAL_LM_MAPPING.values()}" # type: ignore
+
+    def __init__(self, model: nn.Module, transform: bool = True) -> None:
+        assert (
+            model.__class__ in MODEL_FOR_CAUSAL_LM_MAPPING.values()
+            or
+            # FIXME: Use model architectures here instead of complete dictionary TransformersToQEffModulesDict
+            model.__class__ in TransformersToQEffModulesDict.values()
+        ), f"Given model{model.__class__.__name__} could not be found in transformers library i.e. {MODEL_FOR_CAUSAL_LM_MAPPING.values()}"  # type: ignore
         self.model: nn.Module = model
         if transform:
             self.transform()
 
     def __repr__(self) -> str:
         return self.model.__repr__()
-    
+
     @property
     def is_transformed(self) -> bool:
         return getattr(self.model, "qeff_transformed", False)
@@ -49,19 +53,22 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs):
         :param transform:bool. Whether to optimize model for KV retention; default is True. Pass False to get BertStyle model.
         """
         transform: bool = kwargs.get("transform", True)
-        kwargs.update({"use_cache": True})  # Always pass use_cache = True, to get KV values as output during ONNX export 
-        kwargs.update({"attn_implementation" : "eager"}) # Always use eager mode for attention implementation
-
-        model = QEFFAutoModelToTransformersAutoModelMap[cls.__name__].from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
+        kwargs.update(
+            {"use_cache": True}
+        )  # Always pass use_cache = True, to get KV values as output during ONNX export
+        kwargs.update({"attn_implementation": "eager"})  # Always use eager mode for attention implementation
+
+        model = QEFFAutoModelToTransformersAutoModelMap[cls.__name__].from_pretrained(
+            pretrained_model_name_or_path, *args, **kwargs
+        )
         return cls(model, transform=transform)
-
 
     def transform_export(self, *args, **kwargs) -> Any:
         raise NotImplementedError("Reached too far!!")
-    
+
     def transform_export_compile(self, *args, **kwargs) -> Any:
         raise NotImplementedError("Reached too far!!")
-        
+
     def transform(self):
         # FIXME: break down transform into optmization passes i.e. HW specific optimization(RMSNorm), KV retention pass etc.
         QEfficient.transform(self)
@@ -72,22 +79,23 @@ class QEFFAutoModelForCausalLM(QEFFTransformersBase):
     """
     QEFF class for manipulating any causal language model from HuggingFace hub.
     """
-    def execute(self, *args, **kwargs): # type: ignore
+
+    def execute(self, *args, **kwargs):  # type: ignore
         raise NotImplementedError("Reached too far!!")
-    
+
     def export(self):
         raise NotImplementedError("Reached too far!!")
-    
+
     def compile(self, *args, **kwargs) -> Any:
         raise NotImplementedError("Reached too far!!")
 
 
 class QEffAutoModel(QEFFTransformersBase):
-    def execute(self, *args, **kwargs): # type: ignore
+    def execute(self, *args, **kwargs):  # type: ignore
         raise NotImplementedError("Reached too far!!")
-    
+
     def export(self):
         raise NotImplementedError("Reached too far!!")
-    
+
     def compile(self, *args, **kwargs) -> Any:
         raise NotImplementedError("Reached too far!!")
diff --git a/QEfficient/src/base.py b/QEfficient/src/base.py
@@ -18,17 +18,17 @@
 
                                                                                             QEFFBaseModel
                                                  ________________________________________________|________________________________________________________________
-                                                |                                                                                                                 |  
+                                                |                                                                                                                 |
                                             QEFFTransformersBase                                                                                           QEFFDiffusersBase
                                                 |                                                                                                                 |
                                     ____________|________________________________________________________ ________________                       _________________|______________
-                   _____           |                              |                                      |                |                     |                                |         
+                   _____           |                              |                                      |                |                     |                                |
                   |          QEFFAutoModel             QEFFAutoModelForCausalLM              QEFFAWQModelForCausalLM     ...                   ...                              ...
-QEFFCommonLoader -|       [Provides way to          [Provides way to do 1-5 on                 [Supports 1-5 for 
+QEFFCommonLoader -|       [Provides way to          [Provides way to do 1-5 on                 [Supports 1-5 for
 [Provides         |        do steps 1-5 on           transformers.AutoModelForCausalLM]         AWQ Models]
 interface to      |_____   transformers.AutoModel]
-Load any of 
-These models       
+Load any of
+These models
 by automatically
 detecting the type
 of the model]
@@ -42,7 +42,7 @@
 from typing import Any
 
 
-#Defining placeholder ENUM for execute function
+# Defining placeholder ENUM for execute function
 class Runtime(Enum):
     CPU_ORT = "CPU ONNX Runtime"
     CPU_PT = "CPU PyTorch Runtime"
@@ -56,6 +56,7 @@ class QEFFBaseModel(ABC):
 
     All the child classes must provide way to load, transform(optimize), exoprt to ONNX etc. capabilities.
     """
+
     def __init__(self) -> None:
         super().__init__()
         # Users can call generate or execute
@@ -96,4 +97,4 @@ def export(self, *args, **kwargs) -> Any:
 
     @abstractmethod
     def compile(self, *args, **kwargs) -> Any:
-        pass
+        pass