Skip to content

Commit

Permalink
Merge branch 'pytorch:main' into change-969691
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianLundell authored Jan 17, 2025
2 parents b58a26d + eaad7ff commit 9bb1b79
Show file tree
Hide file tree
Showing 65 changed files with 1,789 additions and 1,103 deletions.
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml
tracking_issue: 7679
ciflow_push_tags:
- ciflow/android
- ciflow/apple
Expand Down
19 changes: 19 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,25 @@ jobs:
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
test-qnn-models-linux:
name: test-qnn-models-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 180
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
# reminder: make sure each job runs fast
test-phi-3-mini-runner-linux:
name: test-phi-3-mini-runner-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
Expand Down
1 change: 1 addition & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ include_patterns = [
'build/**/*.py',
'codegen/**/*.py',
# 'devtools/**/*.py',
'devtools/visualization/**/*.py',
'docs/**/*.py',
# 'examples/**/*.py',
# 'exir/**/*.py',
Expand Down
115 changes: 63 additions & 52 deletions backends/arm/_passes/arm_pass_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

# pyre-unsafe

import torch
from executorch.backends.arm._passes.annotate_channels_last_dim_order_pass import (
AnnotateChannelsLastDimOrder,
)
Expand Down Expand Up @@ -47,7 +46,7 @@
)
from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
ConvertMeanDimToAveragePool,
ConvertMeanDimToAveragePoolPass,
)
from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass
from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass
Expand All @@ -61,86 +60,98 @@
from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
UnsqueezeScalarPlaceholdersPass,
)
from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
from executorch.exir import ExportedProgram
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_manager import PassManager
from torch.fx import GraphModule


class ArmPassManager(PassManager):

def _transform(self, graph_module: torch.fx.GraphModule):
def __init__(self, tosa_spec: TosaSpecification) -> None:
self.tosa_spec = tosa_spec
super().__init__()

def _transform(self, graph_module: GraphModule):
return self(graph_module).graph_module

def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
"""Apply passes before transforming program to backend"""
def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
self.add_pass(FuseQuantizedActivationPass())
self.add_pass(RemoveGetItemPass())
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(ConvertMmToBmmPass())
self.add_pass(DecomposeLinearPass())
self.add_pass(ConvertMeanDimToAveragePoolPass())

self.add_pass(AnnotateDecomposedMatmulPass())
self.add_pass(QuantizeFullArgument())
self.add_pass(FoldAndAnnotateQParamsPass())
self.add_pass(RetraceFoldedDtypesPass())
self.add_pass(InsertTableOpsPass(exported_program))

self.add_pass(RemoveClonePass())
self.add_pass(SizeAdjustConv2DPass())
self.add_pass(ConvertExpandCopyToRepeatPass())
self.add_pass(UnsqueezeBeforeRepeatPass())
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
self.add_pass(CastInt64ToInt32Pass(exported_program))
self.add_pass(MatchArgRanksPass(exported_program))
self.add_pass(KeepDimsFalseToSqueezePass())
self.add_pass(Conv1dUnsqueezePass(exported_program))
self.add_pass(DecomposeSelectPass())

self.add_pass(AnnotateChannelsLastDimOrder())

return self._transform(exported_program.graph_module)

def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:

self.add_pass(FuseQuantizedActivationPass())
self.add_pass(RemoveGetItemPass())
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(ConvertMmToBmmPass())
self.add_pass(DecomposeLinearPass())
self.add_pass(DecomposeLayerNormPass())
self.add_pass(DecomposeVarPass())
self.add_pass(ConvertMeanDimToAveragePool())
self.add_pass(DecomposeMeanDimPass())
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(ConvertMmToBmmPass())
# TODO MLETORCH-558
self.add_pass(ConvertMeanDimToAveragePoolPass())
self.add_pass(DecomposeDivPass())
self.add_pass(DecomposeSoftmaxesPass())

self.add_pass(AnnotateDecomposedMatmulPass())
self.add_pass(QuantizeFullArgument())
self.add_pass(
FoldAndAnnotateQParamsPass(
[
exir_ops.edge.aten.minimum.default,
exir_ops.edge.aten.maximum.default,
exir_ops.edge.aten.add.Tensor,
exir_ops.edge.aten.avg_pool2d.default,
exir_ops.edge.aten.bmm.default,
exir_ops.edge.aten.cat.default,
exir_ops.edge.aten.convolution.default,
exir_ops.edge.aten.clone.default,
exir_ops.edge.aten.exp.default,
exir_ops.edge.aten.expand_copy.default,
exir_ops.edge.aten.full.default,
exir_ops.edge.aten.hardtanh.default,
exir_ops.edge.aten.log.default,
exir_ops.edge.aten.max_pool2d.default,
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.permute_copy.default,
exir_ops.edge.aten.reciprocal.default,
exir_ops.edge.aten.relu.default,
exir_ops.edge.aten.repeat.default,
exir_ops.edge.aten.rsqrt.default,
exir_ops.edge.aten.select_copy.int,
exir_ops.edge.aten.sigmoid.default,
exir_ops.edge.aten.slice_copy.Tensor,
exir_ops.edge.aten.squeeze_copy.dims,
exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.sum.dim_IntList,
exir_ops.edge.aten.tanh.default,
exir_ops.edge.aten.unsqueeze_copy.default,
exir_ops.edge.aten.upsample_nearest2d.vec,
exir_ops.edge.aten.view_copy.default,
]
)
)
self.add_pass(FoldAndAnnotateQParamsPass())
self.add_pass(RetraceFoldedDtypesPass())
self.add_pass(InsertTableOpsPass(exported_program))

self.add_pass(RemoveClonePass())
self.add_pass(SizeAdjustConv2DPass())
self.add_pass(ConvertExpandCopyToRepeatPass())
self.add_pass(UnsqueezeBeforeRepeatPass())
self.add_pass(CastInt64ToInt32Pass(exported_program))
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
self.add_pass(SizeAdjustConv2DPass())
self.add_pass(RemoveClonePass())
self.add_pass(CastInt64ToInt32Pass(exported_program))
self.add_pass(MatchArgRanksPass(exported_program))
self.add_pass(DecomposeDivPass())
self.add_pass(KeepDimsFalseToSqueezePass())
self.add_pass(Conv1dUnsqueezePass(exported_program))
self.add_pass(DecomposeSoftmaxesPass())
self.add_pass(DecomposeSelectPass())

self.add_pass(AnnotateChannelsLastDimOrder())

return self._transform(exported_program.graph_module)

def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule):
def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
"""Apply passes before transforming program to backend"""
if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"):
return self._tosa_080_BI_pipeline(exported_program)
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"):
return self._tosa_080_MI_pipeline(exported_program)
else:
raise NotImplementedError(
f"No pass pipeline implemented for {self.tosa_spec=}"
)

def transform_for_annotation_pipeline(self, graph_module: GraphModule):
self.add_pass(ScalarsToAttributePass())
self.add_pass(DecomposeLayerNormPass())
self.add_pass(DecomposeVarPass())
Expand Down
6 changes: 5 additions & 1 deletion backends/arm/_passes/cast_int64_pass.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024 Arm Limited and/or its affiliates.
# Copyright 2024-2025 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
Expand All @@ -17,6 +17,10 @@


class CastInt64ToInt32Pass(ExportPass):
"""
Cast int64 buffers to int32 if the int64 data is in int32 range.
"""

def __init__(self, exported_program: torch.export.ExportedProgram):
super(CastInt64ToInt32Pass, self).__init__()
self.exported_program = exported_program
Expand Down
13 changes: 6 additions & 7 deletions backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Copyright 2024 Arm Limited and/or its affiliates.
# Copyright 2024-2025 Arm Limited and/or its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import copy

from typing import cast, Dict, Iterable, Set, Tuple
from typing import cast, Dict, Set, Tuple

from executorch.backends.arm.tosa_quant_utils import QuantArgs

Expand Down Expand Up @@ -55,7 +55,7 @@ def get_output_qparams(node: Node) -> dict[int, QuantArgs]:
class FoldAndAnnotateQParamsPass(ExportPass):
"""
A pass that walks the graph and removes any DQ and Q nodes before and after the target
node in the supplied list of operators.
node.
The quantization parameters from the DQ/Q nodes are stored as meta values to be
accessible for later lowering and serialization passes.
The assumption is that the quantization annotatation adds DQ nodes for all tensor
Expand All @@ -82,9 +82,8 @@ class FoldAndAnnotateQParamsPass(ExportPass):
"""

def __init__(self, targeted_ops: Iterable[EdgeOpOverload]) -> None:
def __init__(self) -> None:
super().__init__()
self.targeted_ops = targeted_ops

def fold_and_annotate_arg(
self, graph_module: GraphModule, node: Node, arg_list: list[Node], i: int
Expand Down Expand Up @@ -131,7 +130,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
# Loop over the graph nodes and find any node in the 'targeted_ops' list.
for n in graph_module.graph.nodes:
n = cast(Node, n)
if n.op != "call_function" or n.target not in self.targeted_ops:
if n.op != "call_function":
continue

# Make sure we haven't already set qparams meta information on the node
Expand Down Expand Up @@ -180,7 +179,7 @@ class QuantizeFullArgument(ExportPass):

def call(self, graph_module: GraphModule) -> PassResult:
modified = False
# Loop over the graph nodes and find any node in the 'targeted_ops' list.
# Loop over the graph nodes and find full.default nodes.
for n in graph_module.graph.nodes:
n = cast(Node, n)
if n.target != exir_ops.edge.aten.full.default:
Expand Down
7 changes: 4 additions & 3 deletions backends/arm/_passes/fuse_quantized_activation_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ def _is_fuseable_quantized_activation(self, node: Node):
is_fuseable = min_val == 0

is_quantized = len(node.users) == 1 and next(iter(node.users)).target == q_op
if is_quantized:
if is_fuseable and is_quantized:
quant_node = next(iter(node.users))
zp = quant_node.args[2]
qmin = quant_node.args[3]

return is_fuseable and is_quantized and zp == qmin
return zp == qmin
else:
return False

def _is_fuseable_input(self, node: Node):
return (
Expand Down
4 changes: 2 additions & 2 deletions backends/arm/_passes/meandim_to_averagepool_pass.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024 Arm Limited and/or its affiliates.
# Copyright 2024-2025 Arm Limited and/or its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
Expand All @@ -16,7 +16,7 @@
Argument = Any


class ConvertMeanDimToAveragePool(ExportPass):
class ConvertMeanDimToAveragePoolPass(ExportPass):
"""
Replace a mean operation with dim = [-1, -2] and keep_dim = True with an average pool operation.
"""
Expand Down
3 changes: 2 additions & 1 deletion backends/arm/_passes/remove_clone_pass.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024 Arm Limited and/or its affiliates.
# Copyright 2024-2025 Arm Limited and/or its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
Expand All @@ -11,6 +11,7 @@


class RemoveClonePass(ExportPass):
"""Remove all clones from graph_module"""

def call_operator(self, op, args, kwargs, meta):
if op != exir_ops.edge.aten.clone.default:
Expand Down
Loading

0 comments on commit 9bb1b79

Please sign in to comment.