From a8dc6869b2a30f7adea4a0b6811997fb377754b0 Mon Sep 17 00:00:00 2001 From: Matthias Cremon Date: Wed, 8 Jan 2025 14:51:27 -0800 Subject: [PATCH] Refactor CadenceQuantizer (#7540) Summary: The current class structure is hard to cleanly extend. This diff: - Makes `CadenceQuantizer` a base class - Creates a `CadenceDefaultQuantizer` that is exactly the same as the previous `CadenceQuantizer` class - Removes the qconfig from the `CadenceQuantizer`, since it really belongs to the `CadenceAtenQuantizer` (it is defined per op) - Makes both the default qconfig and the default quantizer list module level variables Using this structure will make it much cleaner to add new quantizers in the future. Reviewed By: zonglinpeng Differential Revision: D67645196 --- backends/cadence/aot/compiler.py | 7 +- backends/cadence/aot/export_example.py | 4 +- backends/cadence/aot/quantizer/quantizer.py | 67 ++++++++++++------- .../aot/tests/test_remove_ops_passes.py | 4 +- 4 files changed, 50 insertions(+), 32 deletions(-) diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index c19a4296f6..bd0a45227e 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -17,7 +17,10 @@ print_memory_planning_info, ) from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion -from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer +from executorch.backends.cadence.aot.quantizer.quantizer import ( + CadenceDefaultQuantizer, + CadenceQuantizer, +) from executorch.backends.cadence.aot.utils import ( get_default_memory_config, MemoryConfig, @@ -136,7 +139,7 @@ def quantize_pt2( # Instantiate the quantizer to CadenceQuantizer if not supplied if not quantizer: - quantizer = CadenceQuantizer() + quantizer = CadenceDefaultQuantizer() # Get converted graph module converted_gm = convert_pt2(model, inputs, quantizer) diff --git a/backends/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py index 4ba5bffc96..28a1a60a2a 100644 --- a/backends/cadence/aot/export_example.py +++ b/backends/cadence/aot/export_example.py @@ -20,7 +20,7 @@ fuse_pt2, ) -from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer +from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer from executorch.backends.cadence.runtime import runtime from executorch.backends.cadence.runtime.executor import BundledProgramManager from executorch.exir import ExecutorchProgramManager @@ -74,7 +74,7 @@ def export_model( ) # Instantiate the quantizer - quantizer = CadenceQuantizer(qconfig) + quantizer = CadenceDefaultQuantizer(qconfig) # Convert the model converted_model = convert_pt2(model, example_inputs, quantizer) diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index 73ca40c9aa..65979919ed 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -60,6 +60,13 @@ bias_qspec: Optional[QuantizationSpec] = None +_default_qconfig = QuantizationConfig( + act_qspec, + act_qspec, + wgt_qspec, + None, +) + class CadenceAtenQuantizer(Quantizer): def __init__( @@ -140,31 +147,39 @@ def get_supported_operators(cls) -> List[OperatorConfig]: return [] +def get_cadence_default_quantizer_list_with_config( + quantization_config: QuantizationConfig, +) -> List[Quantizer]: + return [ + CadenceAtenQuantizer(AddmmPattern(), quantization_config), + CadenceAtenQuantizer(BmmPattern(), quantization_config), + CadenceAtenQuantizer(Conv1dPattern(), quantization_config), + CadenceAtenQuantizer(Conv2dPattern(), quantization_config), + CadenceAtenQuantizer(LayerNormPattern(), quantization_config), + CadenceAtenQuantizer(LinearPattern(), quantization_config), + CadenceAtenQuantizer(MatmulPattern(), quantization_config), + CadenceAtenQuantizer(ReluPattern0(), quantization_config), + CadenceAtenQuantizer(ReluPattern1(), quantization_config), + ] + + class CadenceQuantizer(ComposableQuantizer): - def __init__( - self, quantization_config: Optional[QuantizationConfig] = None - ) -> None: - static_qconfig = ( - QuantizationConfig( - act_qspec, - act_qspec, - wgt_qspec, - None, - ) - if not quantization_config - else quantization_config - ) + """ + Generic CadenceQuantizer. Although it can be used directly, it is typically a base + class for explicitly defined quantizers (like CadenceDefaultQuantizer). + """ - super().__init__( - [ - CadenceAtenQuantizer(AddmmPattern(), static_qconfig), - CadenceAtenQuantizer(BmmPattern(), static_qconfig), - CadenceAtenQuantizer(Conv1dPattern(), static_qconfig), - CadenceAtenQuantizer(Conv2dPattern(), static_qconfig), - CadenceAtenQuantizer(LayerNormPattern(), static_qconfig), - CadenceAtenQuantizer(LinearPattern(), static_qconfig), - CadenceAtenQuantizer(MatmulPattern(), static_qconfig), - CadenceAtenQuantizer(ReluPattern0(), static_qconfig), - CadenceAtenQuantizer(ReluPattern1(), static_qconfig), - ] - ) + def __init__(self, quantizers: List[Quantizer]) -> None: + super().__init__(quantizers) + + +class CadenceDefaultQuantizer(CadenceQuantizer): + """ + Default quantizer for Cadence backend. + """ + + def __init__(self, qconfig: Optional[QuantizationConfig] = None) -> None: + if qconfig is None: + qconfig = _default_qconfig + quantizers = get_cadence_default_quantizer_list_with_config(qconfig) + super().__init__(quantizers) diff --git a/backends/cadence/aot/tests/test_remove_ops_passes.py b/backends/cadence/aot/tests/test_remove_ops_passes.py index 25a32a5f07..231096c3ab 100644 --- a/backends/cadence/aot/tests/test_remove_ops_passes.py +++ b/backends/cadence/aot/tests/test_remove_ops_passes.py @@ -12,7 +12,7 @@ from executorch.backends.cadence.aot.compiler import export_to_edge from executorch.backends.cadence.aot.pass_utils import count_node -from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer +from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer from executorch.backends.cadence.aot.remove_ops import ( RemoveAliasCopyOpPass, RemoveCloneOpPass, @@ -465,7 +465,7 @@ def forward(self, x): # Run the standard quant/convert steps, but without fusing # this leaves two redundant quant/dequant pairs to test with - quantizer = CadenceQuantizer() + quantizer = CadenceDefaultQuantizer() model_exp = export_for_training(M(), (inp,)).module() prepared_model = prepare_pt2e(model_exp, quantizer) prepared_model(inp)