Skip to content

Commit

Permalink
Lint & Format (quic#53)
Browse files Browse the repository at this point in the history
* Lint & Format

- Added linting and formatting github actions
- Formatted entire codebase
- Fixed linter errors
- Removed `# noqa` with fix

Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com>

* Split test config into multiple-lines

Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com>

* Fix external repo for workflow

Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com>

* Format newly added files

Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com>

---------

Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com>
  • Loading branch information
irajagop authored Jul 3, 2024
1 parent 6846c8a commit e00978d
Show file tree
Hide file tree
Showing 25 changed files with 393 additions and 239 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/lint-format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Lint & Format
on: [pull_request]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip3 install ruff
- run: ruff check
env:
RUFF_OUTPUT_FORMAT: github
format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip3 install ruff
- run: ruff format --check
env:
RUFF_OUTPUT_FORMAT: github
18 changes: 14 additions & 4 deletions QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,22 @@
#
# -----------------------------------------------------------------------------

from QEfficient.compile.compile_helper import compile # noqa: F401
from QEfficient.compile.compile_helper import compile
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv # noqa: F401
from QEfficient.src import QEffAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader # noqa: F401
from QEfficient.transformers.transform import transform # noqa: F401
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.src import QEffAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader
from QEfficient.transformers.transform import transform

# Users can use QEfficient.export for exporting models to ONNX
export = qualcomm_efficient_converter
__version__ = "0.0.1.dev0"

__all__ = [
"transform",
"export",
"compile",
"cloud_ai_100_exec_kv",
"QEffAutoModel",
"QEFFAutoModelForCausalLM",
"QEFFCommonLoader",
]
40 changes: 25 additions & 15 deletions QEfficient/cloud/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@
ROOT_DIR = os.path.dirname(os.path.abspath(""))


def get_onnx_model_path(model_name: str, cache_dir: Optional[str] = None, tokenizer: Optional[Union[PreTrainedTokenizerFast, PreTrainedTokenizer]]=None, hf_token: Optional[str] = None, local_model_dir: Optional[str] = None):
def get_onnx_model_path(
model_name: str,
cache_dir: Optional[str] = None,
tokenizer: Optional[Union[PreTrainedTokenizerFast, PreTrainedTokenizer]] = None,
hf_token: Optional[str] = None,
local_model_dir: Optional[str] = None,
):
"""
exports the model to onnx if pre-exported file is not found and returns onnx_model_path
"""
Expand All @@ -33,19 +39,21 @@ def get_onnx_model_path(model_name: str, cache_dir: Optional[str] = None, tokeni
# Export to the Onnx
logger.info(f"Exporting Pytorch {model_name} model to ONNX...")
_, generated_onnx_model_path = qualcomm_efficient_converter(
model_name=model_name,
local_model_dir=local_model_dir,
tokenizer=tokenizer,
onnx_dir_path=onnx_dir_path,
kv=True,
form_factor="cloud",
hf_token=hf_token,
cache_dir=cache_dir
) # type: ignore
logger.info(f"Generated Onnx_path {generated_onnx_model_path} \nOnnx_model_path {onnx_model_path} \nand Onnx_dir_path is {onnx_dir_path}")
model_name=model_name,
local_model_dir=local_model_dir,
tokenizer=tokenizer,
onnx_dir_path=onnx_dir_path,
kv=True,
form_factor="cloud",
hf_token=hf_token,
cache_dir=cache_dir,
) # type: ignore
logger.info(
f"Generated Onnx_path {generated_onnx_model_path} \nOnnx_model_path {onnx_model_path} \nand Onnx_dir_path is {onnx_dir_path}"
)
assert (
generated_onnx_model_path == onnx_model_path
), f"ONNX files were generated at an unusual location, expected {onnx_model_path}, got {generated_onnx_model_path}"
generated_onnx_model_path == onnx_model_path
), f"ONNX files were generated at an unusual location, expected {onnx_model_path}, got {generated_onnx_model_path}"
return onnx_model_path


Expand All @@ -63,14 +71,16 @@ def main(
:hf_token: str. HuggingFace login token to access private repos.
:local_model_dir: str. Path to custom model weights and config files.
"""
cache_dir = check_and_assign_cache_dir(local_model_dir,cache_dir)
cache_dir = check_and_assign_cache_dir(local_model_dir, cache_dir)
get_onnx_model_path(model_name=model_name, cache_dir=cache_dir, hf_token=hf_token, local_model_dir=local_model_dir)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Export script.")
parser.add_argument("--model_name", "--model-name", required=True, help="HF Model card name/id")
parser.add_argument("--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files")
parser.add_argument(
"--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files"
)
parser.add_argument(
"--cache_dir",
"--cache-dir",
Expand Down
15 changes: 11 additions & 4 deletions QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
def main(
model_name: str,
num_cores: int,
prompt: Optional[str] = None, # type: ignore
prompt: Optional[str] = None, # type: ignore
local_model_dir: Optional[str] = None,
prompts_txt_file_path: Optional[str] = None,
aic_enable_depth_first: bool = False,
Expand All @@ -51,9 +51,14 @@ def main(
num_cores, mos, batch_size, prompt_len, ctx_len, mxfp6, mxint8, device_group
)
prompt: List[str] = check_batch_size_and_num_prompts(prompt, prompts_txt_file_path, batch_size)
cache_dir = check_and_assign_cache_dir(local_model_dir,cache_dir)
cache_dir = check_and_assign_cache_dir(local_model_dir, cache_dir)

tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=(local_model_dir if local_model_dir else model_name), cache_dir=cache_dir, hf_token=hf_token, local_model_dir=local_model_dir)
tokenizer = load_hf_tokenizer(
pretrained_model_name_or_path=(local_model_dir if local_model_dir else model_name),
cache_dir=cache_dir,
hf_token=hf_token,
local_model_dir=local_model_dir,
)

qpc_path_exists, qpc_dir_path = qpc_exists(model_name, qpc_base_dir_name)
# Handle qpc generation
Expand Down Expand Up @@ -104,7 +109,9 @@ def main(
description="Inference command, the model will be downloaded from HF, optmized, compiled, executed on Cloud AI 100"
)
parser.add_argument("--model-name", "--model_name", required=True, help="HF Model card name/id")
parser.add_argument("--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files")
parser.add_argument(
"--local-model-dir", "--local_model_dir", required=False, help="Path to custom model weights and config files"
)
parser.add_argument(
"--cache-dir",
"--cache_dir",
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/compile/compile_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def compile(
ctx_len: int = 128,
mxfp6: bool = True,
mxint8: bool = False,
**kwargs
**kwargs,
) -> str:
# Dynamically create the specializations JSON
"""
Expand Down
4 changes: 2 additions & 2 deletions QEfficient/exporter/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def fix_onnx_fp16(
ort_outputs: List[np.ndarray],
gen_models_path: str,
model_base_name: str,
pt_outputs: Dict[str, torch.Tensor]
pt_outputs: Dict[str, torch.Tensor],
) -> str:
finfo = np.finfo(np.float16)
fp16_max = finfo.max
Expand Down Expand Up @@ -218,7 +218,7 @@ def fix_onnx_fp16(
os.path.join(gen_models_path, f"{model_base_name}.onnx"),
os.path.join(gen_models_path, f"{model_base_name}.onnxweights.data"),
)

model_base_name += "_clipped_fp16"
onnx.save_model(
model,
Expand Down
6 changes: 4 additions & 2 deletions QEfficient/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
#
# -----------------------------------------------------------------------------

from QEfficient.src._transformers.auto import QEffAutoModel, QEFFAutoModelForCausalLM # noqa: F401
from QEfficient.src.common import QEFFCommonLoader # noqa: F401
from QEfficient.src._transformers.auto import QEffAutoModel, QEFFAutoModelForCausalLM
from QEfficient.src.common import QEFFCommonLoader

__all__ = ["QEffAutoModel", "QEFFAutoModelForCausalLM", "QEFFCommonLoader"]
44 changes: 26 additions & 18 deletions QEfficient/src/_transformers/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,21 @@ class QEFFTransformersBase(QEFFBaseModel):
"""
Parent class for models QEFF provides from transformers i.e. (AutoModel, AutoModelForCausalLM, AutoModelForAudioClassification etc.) from src/transformers/models/auto/modeling_auto.py file.
"""
def __init__(self, model: nn.Module, transform:bool = True) -> None:
assert (model.__class__ in MODEL_FOR_CAUSAL_LM_MAPPING.values() or
# FIXME: Use model architectures here instead of complete dictionary TransformersToQEffModulesDict
model.__class__ in TransformersToQEffModulesDict.values()), f"Given model{model.__class__.__name__} could not be found in transformers library i.e. {MODEL_FOR_CAUSAL_LM_MAPPING.values()}" # type: ignore

def __init__(self, model: nn.Module, transform: bool = True) -> None:
assert (
model.__class__ in MODEL_FOR_CAUSAL_LM_MAPPING.values()
or
# FIXME: Use model architectures here instead of complete dictionary TransformersToQEffModulesDict
model.__class__ in TransformersToQEffModulesDict.values()
), f"Given model{model.__class__.__name__} could not be found in transformers library i.e. {MODEL_FOR_CAUSAL_LM_MAPPING.values()}" # type: ignore
self.model: nn.Module = model
if transform:
self.transform()

def __repr__(self) -> str:
return self.model.__repr__()

@property
def is_transformed(self) -> bool:
return getattr(self.model, "qeff_transformed", False)
Expand All @@ -49,19 +53,22 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs):
:param transform:bool. Whether to optimize model for KV retention; default is True. Pass False to get BertStyle model.
"""
transform: bool = kwargs.get("transform", True)
kwargs.update({"use_cache": True}) # Always pass use_cache = True, to get KV values as output during ONNX export
kwargs.update({"attn_implementation" : "eager"}) # Always use eager mode for attention implementation

model = QEFFAutoModelToTransformersAutoModelMap[cls.__name__].from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
kwargs.update(
{"use_cache": True}
) # Always pass use_cache = True, to get KV values as output during ONNX export
kwargs.update({"attn_implementation": "eager"}) # Always use eager mode for attention implementation

model = QEFFAutoModelToTransformersAutoModelMap[cls.__name__].from_pretrained(
pretrained_model_name_or_path, *args, **kwargs
)
return cls(model, transform=transform)


def transform_export(self, *args, **kwargs) -> Any:
raise NotImplementedError("Reached too far!!")

def transform_export_compile(self, *args, **kwargs) -> Any:
raise NotImplementedError("Reached too far!!")

def transform(self):
# FIXME: break down transform into optmization passes i.e. HW specific optimization(RMSNorm), KV retention pass etc.
QEfficient.transform(self)
Expand All @@ -72,22 +79,23 @@ class QEFFAutoModelForCausalLM(QEFFTransformersBase):
"""
QEFF class for manipulating any causal language model from HuggingFace hub.
"""
def execute(self, *args, **kwargs): # type: ignore

def execute(self, *args, **kwargs): # type: ignore
raise NotImplementedError("Reached too far!!")

def export(self):
raise NotImplementedError("Reached too far!!")

def compile(self, *args, **kwargs) -> Any:
raise NotImplementedError("Reached too far!!")


class QEffAutoModel(QEFFTransformersBase):
def execute(self, *args, **kwargs): # type: ignore
def execute(self, *args, **kwargs): # type: ignore
raise NotImplementedError("Reached too far!!")

def export(self):
raise NotImplementedError("Reached too far!!")

def compile(self, *args, **kwargs) -> Any:
raise NotImplementedError("Reached too far!!")
15 changes: 8 additions & 7 deletions QEfficient/src/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@
QEFFBaseModel
________________________________________________|________________________________________________________________
| |
| |
QEFFTransformersBase QEFFDiffusersBase
| |
____________|________________________________________________________ ________________ _________________|______________
_____ | | | | | |
_____ | | | | | |
| QEFFAutoModel QEFFAutoModelForCausalLM QEFFAWQModelForCausalLM ... ... ...
QEFFCommonLoader -| [Provides way to [Provides way to do 1-5 on [Supports 1-5 for
QEFFCommonLoader -| [Provides way to [Provides way to do 1-5 on [Supports 1-5 for
[Provides | do steps 1-5 on transformers.AutoModelForCausalLM] AWQ Models]
interface to |_____ transformers.AutoModel]
Load any of
These models
Load any of
These models
by automatically
detecting the type
of the model]
Expand All @@ -42,7 +42,7 @@
from typing import Any


#Defining placeholder ENUM for execute function
# Defining placeholder ENUM for execute function
class Runtime(Enum):
CPU_ORT = "CPU ONNX Runtime"
CPU_PT = "CPU PyTorch Runtime"
Expand All @@ -56,6 +56,7 @@ class QEFFBaseModel(ABC):
All the child classes must provide way to load, transform(optimize), exoprt to ONNX etc. capabilities.
"""

def __init__(self) -> None:
super().__init__()
# Users can call generate or execute
Expand Down Expand Up @@ -96,4 +97,4 @@ def export(self, *args, **kwargs) -> Any:

@abstractmethod
def compile(self, *args, **kwargs) -> Any:
pass
pass
Loading

0 comments on commit e00978d

Please sign in to comment.