From a8dc6869b2a30f7adea4a0b6811997fb377754b0 Mon Sep 17 00:00:00 2001
From: Matthias Cremon <matthiascremon@meta.com>
Date: Wed, 8 Jan 2025 14:51:27 -0800
Subject: [PATCH] Refactor CadenceQuantizer (#7540)

Summary:

The current class structure is hard to cleanly extend. This diff:
- Makes `CadenceQuantizer` a base class
- Creates a `CadenceDefaultQuantizer` that is exactly the same as the previous `CadenceQuantizer` class
- Removes the qconfig from the `CadenceQuantizer`, since it really belongs to the `CadenceAtenQuantizer` (it is defined per op)
- Makes both the default qconfig and the default quantizer list module level variables

Using this structure will make it much cleaner to add new quantizers in the future.

Reviewed By: zonglinpeng

Differential Revision: D67645196
---
 backends/cadence/aot/compiler.py              |  7 +-
 backends/cadence/aot/export_example.py        |  4 +-
 backends/cadence/aot/quantizer/quantizer.py   | 67 ++++++++++++-------
 .../aot/tests/test_remove_ops_passes.py       |  4 +-
 4 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
index c19a4296f6..bd0a45227e 100644
--- a/backends/cadence/aot/compiler.py
+++ b/backends/cadence/aot/compiler.py
@@ -17,7 +17,10 @@
     print_memory_planning_info,
 )
 from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
-from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
+from executorch.backends.cadence.aot.quantizer.quantizer import (
+    CadenceDefaultQuantizer,
+    CadenceQuantizer,
+)
 from executorch.backends.cadence.aot.utils import (
     get_default_memory_config,
     MemoryConfig,
@@ -136,7 +139,7 @@ def quantize_pt2(
 
     # Instantiate the quantizer to CadenceQuantizer if not supplied
     if not quantizer:
-        quantizer = CadenceQuantizer()
+        quantizer = CadenceDefaultQuantizer()
 
     # Get converted graph module
     converted_gm = convert_pt2(model, inputs, quantizer)
diff --git a/backends/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py
index 4ba5bffc96..28a1a60a2a 100644
--- a/backends/cadence/aot/export_example.py
+++ b/backends/cadence/aot/export_example.py
@@ -20,7 +20,7 @@
     fuse_pt2,
 )
 
-from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
+from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer
 from executorch.backends.cadence.runtime import runtime
 from executorch.backends.cadence.runtime.executor import BundledProgramManager
 from executorch.exir import ExecutorchProgramManager
@@ -74,7 +74,7 @@ def export_model(
     )
 
     # Instantiate the quantizer
-    quantizer = CadenceQuantizer(qconfig)
+    quantizer = CadenceDefaultQuantizer(qconfig)
 
     # Convert the model
     converted_model = convert_pt2(model, example_inputs, quantizer)
diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py
index 73ca40c9aa..65979919ed 100644
--- a/backends/cadence/aot/quantizer/quantizer.py
+++ b/backends/cadence/aot/quantizer/quantizer.py
@@ -60,6 +60,13 @@
 
 bias_qspec: Optional[QuantizationSpec] = None
 
+_default_qconfig = QuantizationConfig(
+    act_qspec,
+    act_qspec,
+    wgt_qspec,
+    None,
+)
+
 
 class CadenceAtenQuantizer(Quantizer):
     def __init__(
@@ -140,31 +147,39 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
         return []
 
 
+def get_cadence_default_quantizer_list_with_config(
+    quantization_config: QuantizationConfig,
+) -> List[Quantizer]:
+    return [
+        CadenceAtenQuantizer(AddmmPattern(), quantization_config),
+        CadenceAtenQuantizer(BmmPattern(), quantization_config),
+        CadenceAtenQuantizer(Conv1dPattern(), quantization_config),
+        CadenceAtenQuantizer(Conv2dPattern(), quantization_config),
+        CadenceAtenQuantizer(LayerNormPattern(), quantization_config),
+        CadenceAtenQuantizer(LinearPattern(), quantization_config),
+        CadenceAtenQuantizer(MatmulPattern(), quantization_config),
+        CadenceAtenQuantizer(ReluPattern0(), quantization_config),
+        CadenceAtenQuantizer(ReluPattern1(), quantization_config),
+    ]
+
+
 class CadenceQuantizer(ComposableQuantizer):
-    def __init__(
-        self, quantization_config: Optional[QuantizationConfig] = None
-    ) -> None:
-        static_qconfig = (
-            QuantizationConfig(
-                act_qspec,
-                act_qspec,
-                wgt_qspec,
-                None,
-            )
-            if not quantization_config
-            else quantization_config
-        )
+    """
+    Generic CadenceQuantizer. Although it can be used directly, it is typically a base
+    class for explicitly defined quantizers (like CadenceDefaultQuantizer).
+    """
 
-        super().__init__(
-            [
-                CadenceAtenQuantizer(AddmmPattern(), static_qconfig),
-                CadenceAtenQuantizer(BmmPattern(), static_qconfig),
-                CadenceAtenQuantizer(Conv1dPattern(), static_qconfig),
-                CadenceAtenQuantizer(Conv2dPattern(), static_qconfig),
-                CadenceAtenQuantizer(LayerNormPattern(), static_qconfig),
-                CadenceAtenQuantizer(LinearPattern(), static_qconfig),
-                CadenceAtenQuantizer(MatmulPattern(), static_qconfig),
-                CadenceAtenQuantizer(ReluPattern0(), static_qconfig),
-                CadenceAtenQuantizer(ReluPattern1(), static_qconfig),
-            ]
-        )
+    def __init__(self, quantizers: List[Quantizer]) -> None:
+        super().__init__(quantizers)
+
+
+class CadenceDefaultQuantizer(CadenceQuantizer):
+    """
+    Default quantizer for Cadence backend.
+    """
+
+    def __init__(self, qconfig: Optional[QuantizationConfig] = None) -> None:
+        if qconfig is None:
+            qconfig = _default_qconfig
+        quantizers = get_cadence_default_quantizer_list_with_config(qconfig)
+        super().__init__(quantizers)
diff --git a/backends/cadence/aot/tests/test_remove_ops_passes.py b/backends/cadence/aot/tests/test_remove_ops_passes.py
index 25a32a5f07..231096c3ab 100644
--- a/backends/cadence/aot/tests/test_remove_ops_passes.py
+++ b/backends/cadence/aot/tests/test_remove_ops_passes.py
@@ -12,7 +12,7 @@
 from executorch.backends.cadence.aot.compiler import export_to_edge
 
 from executorch.backends.cadence.aot.pass_utils import count_node
-from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
+from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer
 from executorch.backends.cadence.aot.remove_ops import (
     RemoveAliasCopyOpPass,
     RemoveCloneOpPass,
@@ -465,7 +465,7 @@ def forward(self, x):
 
         # Run the standard quant/convert steps, but without fusing
         # this leaves two redundant quant/dequant pairs to test with
-        quantizer = CadenceQuantizer()
+        quantizer = CadenceDefaultQuantizer()
         model_exp = export_for_training(M(), (inp,)).module()
         prepared_model = prepare_pt2e(model_exp, quantizer)
         prepared_model(inp)