Skip to content

Commit

Permalink
2024-12-13 nightly release (3f7eb3b)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Dec 13, 2024
1 parent a2cef91 commit 717584c
Show file tree
Hide file tree
Showing 43 changed files with 689 additions and 209 deletions.
3 changes: 0 additions & 3 deletions .ci/docker/ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ RUN if [ -n "${ANDROID_NDK_VERSION}" ]; then bash ./install_android.sh; fi
RUN rm install_android.sh

ARG ARM_SDK
COPY --chown=ci-user:ci-user ./arm /opt/arm
# Set up ARM SDK if needed
RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "ossci@example.com"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi

ARG QNN_SDK

Expand Down
3 changes: 2 additions & 1 deletion .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

install_executorch_and_backend_lib() {
echo "Installing executorch and xnnpack backend"
rm -rf cmake-android-out && mkdir cmake-android-out
clean_executorch_install_folders
mkdir cmake-android-out
ANDROID_NDK=/opt/ndk
BUCK2=buck2
ANDROID_ABI=arm64-v8a
Expand Down
9 changes: 7 additions & 2 deletions .ci/scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ retry () {
"$@" || (sleep 30 && reset_buck && "$@") || (sleep 60 && reset_buck && "$@")
}

clean_executorch_install_folders() {
./install_requirements.sh --clean
}

install_executorch() {
which pip
# Install executorch, this assumes that Executorch is checked out in the
Expand Down Expand Up @@ -74,7 +78,8 @@ build_executorch_runner_buck2() {
build_executorch_runner_cmake() {
CMAKE_OUTPUT_DIR=cmake-out
# Build executorch runtime using cmake
rm -rf "${CMAKE_OUTPUT_DIR}" && mkdir "${CMAKE_OUTPUT_DIR}"
clean_executorch_install_folders
mkdir "${CMAKE_OUTPUT_DIR}"

pushd "${CMAKE_OUTPUT_DIR}" || return
# This command uses buck2 to gather source files and buck2 could crash flakily
Expand Down Expand Up @@ -103,7 +108,7 @@ build_executorch_runner() {

cmake_install_executorch_lib() {
echo "Installing libexecutorch.a and libportable_kernels.a"
rm -rf cmake-out
clean_executorch_install_folders
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/docker-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ on:
- .ci/docker/**
- .github/workflows/docker-builds.yml
- requirements-lintrunner.txt
- examples/arm/setup.sh
- examples/arm/ethos-u-setup/**
push:
branches:
- main
Expand All @@ -17,8 +15,6 @@ on:
- .ci/docker/**
- .github/workflows/docker-builds.yml
- requirements-lintrunner.txt
- examples/arm/setup.sh
- examples/arm/ethos-u-setup/**
schedule:
- cron: 1 3 * * 3

Expand Down
154 changes: 117 additions & 37 deletions backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from executorch.backends.arm._passes.arm_pass_utils import (
create_node,
get_first_fake_tensor,
get_node_arg,
insert_q_dq_pair,
)
from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, register_passable_op
Expand Down Expand Up @@ -83,14 +84,48 @@ def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):

return False

def insert_input_transpose(self, node, input_node, graph_module):
@staticmethod
def memory_format_differs(shape):
"""Returns true if the shape will have a different memory layout in NCHW and NHWC format"""
if len(shape) >= 4:
C = shape[1]
H = shape[2]
W = shape[3]
elif len(shape) == 3:
C = shape[0]
H = shape[1]
W = shape[2]
if len(shape) <= 2:
return False

return C > 1 and (H > 1 or W > 1)

@staticmethod
def is_channel_reshape(input_shape, output_shape):
"""Returns true if the reshape changes the channel dimension"""
if not len(input_shape) == len(output_shape) == 4:
return False

C_old = input_shape[1]
C_new = output_shape[1]

N_new = output_shape[0]
N_old = input_shape[0]

return (N_old != N_new) or (C_old != C_new)

@staticmethod
def insert_input_transpose(node, input_node, graph_module):
quantize = input_node.target == dq_op
q_params = input_node.args[1:] if quantize else None
with graph_module.graph.inserting_before(node):
permute_node = create_node(
graph_module.graph,
torch.ops.passthrough_to_tosa._transpose,
args=(input_node, list(self.NHWC_inverse_order)),
args=(
input_node,
list(AnnotateChannelsLastDimOrder.NHWC_inverse_order),
),
quantize=quantize,
q_params=q_params,
)
Expand All @@ -100,14 +135,17 @@ def insert_input_transpose(self, node, input_node, graph_module):
range(len(input_node.meta["val"].size()))
)

def insert_output_transpose(self, node, graph_module):
@staticmethod
def insert_output_transpose(node, graph_module):
with graph_module.graph.inserting_after(node):
permute_node = create_node(
graph_module.graph,
torch.ops.passthrough_to_tosa._transpose,
args=(node, list(self.NHWC_order)),
args=(node, list(AnnotateChannelsLastDimOrder.NHWC_order)),
)
permute_node.meta["tosa_dim_order"] = (
AnnotateChannelsLastDimOrder.NHWC_order
)
permute_node.meta["tosa_dim_order"] = self.NHWC_order
node.meta["tosa_dim_order"] = (0, 1, 2, 3)
users = [user for user in node.users if user != permute_node]
for user in users:
Expand All @@ -118,54 +156,96 @@ def insert_output_transpose(self, node, graph_module):
q_params = node.args[0].args[1:]
insert_q_dq_pair(graph_module.graph, node, q_params)

@staticmethod
def _insert_squeeze_transpose(
input_shape, output_shape, node, input_node, graph_module
):
nhwc_to_nhwc = len(input_shape) == 4 and len(output_shape) <= 3

if nhwc_to_nhwc and AnnotateChannelsLastDimOrder.memory_format_differs(
input_shape
):
AnnotateChannelsLastDimOrder.insert_input_transpose(
node, input_node, graph_module
)

@staticmethod
def _insert_unsqueeze_transpose(input_shape, output_shape, node, graph_module):
nchw_to_nhwc = len(input_shape) == 3 and len(output_shape) == 4
if nchw_to_nhwc and AnnotateChannelsLastDimOrder.memory_format_differs(
output_shape
):
AnnotateChannelsLastDimOrder.insert_output_transpose(node, graph_module)

@staticmethod
def _insert_view_transpose(
input_shape, output_shape, node, input_node, graph_module
):
nchw_to_nhwc = len(input_shape) < 4 and len(output_shape) == 4
nhwc_to_nchw = len(input_shape) == 4 and len(output_shape) < 4
channel_reshape = AnnotateChannelsLastDimOrder.is_channel_reshape(
output_shape, input_shape
)

if (
channel_reshape or nhwc_to_nchw
) and AnnotateChannelsLastDimOrder.memory_format_differs(input_shape):
AnnotateChannelsLastDimOrder.insert_input_transpose(
node, input_node, graph_module
)
if (
channel_reshape or nchw_to_nhwc
) and AnnotateChannelsLastDimOrder.memory_format_differs(output_shape):
AnnotateChannelsLastDimOrder.insert_output_transpose(node, graph_module)

def insert_tosa_transposes(self, graph_module: torch.fx.GraphModule):
"""
Reshape operations are not equivalent in NCHW and NHWC.
To get around this, transposes need to be added if the previous or new shape
fulfil the following condition:
C > 1 and (H or W > 1)
This is relevant for the following operations;
squeeze: 4D -> 3D
unsqueeze: <4D -> 4D
view: <4D -> 4D
view: 4D -> <4D
view: 4D -> 4D
"""

def transpose_condition(shape):
if len(shape) != 4:
return False
C = shape[1]
H = shape[2]
W = shape[3]
return C > 1 and (H > 1 or W > 1)
Transposes are needed for operators transforming the input to a different rank, as 4D-tensors are assumed to be in NHWC-format, whereas all other are in NCHW format.
This is relevant for the following cases:
- squeeze: 4D -> <4D
- unsqueeze: 3D -> 4D
- view: <4D -> 4D
- view: 4D -> <4D
Additionally, a 4D->4D view operation acting on the channel dimension currently needs to be performed in NCHW format, leadning to one extra input and output transpose for this case.
Transposes can be avoided for shapes where there is no difference in actual memory, e.g for
- H == W == 1
- C == 1
- 1D/2D tensors
"""
for node in graph_module.graph.nodes:
if node.op != "call_function":
continue

if node.target == exir_ops.edge.aten.squeeze_copy.dims:
input_node = node.args[0]
input_shape = input_node.meta["val"].shape
if transpose_condition(input_shape):
self.insert_input_transpose(node, input_node, graph_module)
output_shape = node.meta["val"].shape

self._insert_squeeze_transpose(
input_shape, output_shape, node, input_node, graph_module
)

elif node.target == exir_ops.edge.aten.unsqueeze_copy.default:
input_node = get_node_arg(node.args, 0, default_value=False)
if input_node:
input_shape = input_node.meta["val"].shape
else:
input_shape = ()
output_shape = node.meta["val"].shape
if transpose_condition(output_shape):
self.insert_output_transpose(node, graph_module)

self._insert_unsqueeze_transpose(
input_shape, output_shape, node, graph_module
)

elif node.target == exir_ops.edge.aten.view_copy.default:
input_node = node.args[0]
input_shape = input_node.meta["val"].shape
output_shape = node.meta["val"].shape

old_shape = input_node.meta["val"].shape
new_shape = node.meta["val"].shape

if transpose_condition(old_shape):
self.insert_input_transpose(node, input_node, graph_module)

if transpose_condition(new_shape):
self.insert_output_transpose(node, graph_module)
self._insert_view_transpose(
input_shape, output_shape, node, input_node, graph_module
)

def call(self, graph_module: torch.fx.GraphModule):
for node in graph_module.graph.nodes:
Expand Down
8 changes: 4 additions & 4 deletions backends/arm/test/ops/test_layer_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ def test_layer_norm_tosa_BI(

# Numerical issues on FVP likely due to mul op, MLETORCH-521
# Skip tests that require transposes.
@parameterized.expand(test_data_suite[:-2])
@parameterized.expand(test_data_suite)
@unittest.expectedFailure
def test_layer_norm_u55_BI(
def test_layer_norm_u55_BI_xfails(
self,
test_name: str,
test_data: torch.Tensor,
Expand All @@ -171,7 +171,8 @@ def test_layer_norm_u55_BI(

# Numerical issues on FVP likely due to mul op, MLETORCH-521
@parameterized.expand(test_data_suite[:-2])
def test_layer_norm_u85_BI_fvp(
@unittest.expectedFailure
def test_layer_norm_u85_BI_xfails(
self,
test_name: str,
test_data: torch.Tensor,
Expand All @@ -182,7 +183,6 @@ def test_layer_norm_u85_BI_fvp(
)

@parameterized.expand(test_data_suite[-2:])
@unittest.skip # Flaky
def test_layer_norm_u85_BI(
self,
test_name: str,
Expand Down
1 change: 0 additions & 1 deletion backends/arm/test/ops/test_to_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def _test_to_copy_tosa_MI_pipeline(
)
.export()
.dump_artifact()
.check_count({"torch.ops.aten._to_copy.default": 1})
.to_edge()
.dump_artifact()
.partition()
Expand Down
1 change: 1 addition & 0 deletions backends/arm/test/ops/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class View(torch.nn.Module):
(torch.rand(1, 1, 5, 10), (1, 1, 50, 1)),
(torch.rand(5, 10, 1, 1), (1, 25, 2)),
(torch.rand(2, 50, 1, 1), (1, 100)),
(torch.rand(2, 3, 2, 3), (2, 3, 3, 2)),
]

def forward(self, x: torch.Tensor, new_shape):
Expand Down
Loading

0 comments on commit 717584c

Please sign in to comment.