diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index bc854337a8c..5ddcfcf414b 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -18,17 +18,11 @@ ) from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer - -from executorch.backends.cadence.aot.replace_ops import ReplaceSafeSoftmaxWithSoftmax from executorch.backends.cadence.aot.utils import ( get_default_memory_config, MemoryConfig, - model_gm_has_SDPA, model_is_quantized, ) -from executorch.backends.transforms.decompose_sdpa import ( - DecomposeScaledDotProductAttention, -) from executorch.devtools import generate_etrecord from executorch.exir import ( EdgeCompileConfig, @@ -91,16 +85,6 @@ def convert_pt2( .module() ) - if model_gm_has_SDPA(model_gm): - # Decompose SDPA - DecomposeScaledDotProductAttention(False)(model_gm) - - # Swap _safe_softmax with _softmax (see https://github.com/pytorch/pytorch/pull/133882 - # for details). - result = ReplaceSafeSoftmaxWithSoftmax()(model_gm) - assert result is not None - model_gm = result.graph_module - # Prepare prepared_model = prepare_pt2e(model_gm, quantizer) diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py index 3b2084e3d6d..35c5d94d77c 100644 --- a/backends/cadence/aot/utils.py +++ b/backends/cadence/aot/utils.py @@ -235,14 +235,6 @@ def print_ops_info( ) -def model_gm_has_SDPA(model_gm: torch.fx.GraphModule) -> bool: - for node in model_gm.graph.nodes: - if node.op == "call_function": - if node.target == torch.ops.aten.scaled_dot_product_attention.default: - return True - return False - - def save_pte_program( prog: ExecutorchProgramManager, model_name: str, output_dir: str = "" ) -> None: