Skip to content

Commit 55aeae8

Browse files
committed
Merge branch 'main' of https://github.com/pytorch/executorch into change-1078286
2 parents b1248b6 + 4197fc1 commit 55aeae8

File tree

332 files changed

+10376
-9559
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

332 files changed

+10376
-9559
lines changed

.github/workflows/build-presets.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
strategy:
2121
fail-fast: false
2222
matrix:
23-
preset: [macos, ios, ios-simulator, pybind, llm]
23+
preset: [macos, ios, ios-simulator, pybind, profiling, llm]
2424
with:
2525
job-name: build
2626
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,30 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
278278
)
279279
endif()
280280

281+
if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
282+
set(TORCHAO_BUILD_ATEN_OPS OFF)
283+
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
284+
set(TORCHAO_BUILD_CPU_AARCH64 ON)
285+
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
286+
287+
list(APPEND TORCHAO_INCLUDE_DIRS
288+
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
289+
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
290+
${EXECUTORCH_ROOT}/third-party/ao
291+
)
292+
293+
set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
294+
295+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental)
296+
executorch_target_link_options_shared_lib(torchao_ops_executorch)
297+
list(APPEND _executorch_kernels torchao_ops_executorch)
298+
endif()
299+
300+
if(EXECUTORCH_BUILD_TESTS)
301+
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
302+
include(CTest)
303+
endif()
304+
281305
# TODO(dbort): Fix these warnings and remove this flag.
282306
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
283307

CMakePresets.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,26 @@
100100
"list": ["Darwin", "Linux", "Windows"]
101101
}
102102
},
103+
{
104+
"name": "profiling",
105+
"displayName": "Build ExecuTorch with Profiling Enabled",
106+
"inherits": [
107+
"common"
108+
],
109+
"cacheVariables": {
110+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/profiling.cmake",
111+
"CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
112+
},
113+
"condition": {
114+
"type": "inList",
115+
"string": "${hostSystemName}",
116+
"list": [
117+
"Darwin",
118+
"Linux",
119+
"Windows"
120+
]
121+
}
122+
},
103123
{
104124
"name": "zephyr",
105125
"displayName": "Build ExecuTorch for Zephyr RTOS",

backends/apple/coreml/TARGETS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ runtime.python_library(
1717
name = "backend",
1818
srcs = glob([
1919
"compiler/*.py",
20+
"logging.py",
2021
]),
2122
visibility = [
2223
"@EXECUTORCH_CLIENTS",
@@ -33,6 +34,7 @@ runtime.python_library(
3334
name = "partitioner",
3435
srcs = glob([
3536
"partition/*.py",
37+
"logging.py",
3638
]),
3739
visibility = [
3840
"@EXECUTORCH_CLIENTS",

backends/apple/coreml/compiler/coreml_preprocess.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@
1616

1717
import coremltools as ct
1818
import coremltools.optimize as cto
19-
2019
from executorch.backends.apple.coreml import executorchcoreml
20+
from executorch.backends.apple.coreml.logging import get_coreml_log_level
2121
from executorch.exir.backend.backend_details import (
2222
BackendDetails,
2323
ExportedProgram,
2424
PreprocessResult,
2525
)
2626
from executorch.exir.backend.compile_spec_schema import CompileSpec
2727

28-
logger = logging.getLogger(__name__)
29-
logger.setLevel(logging.WARNING)
30-
3128
from executorch.backends.apple.coreml.compiler.torch_ops import * # noqa: F401, F403
3229

30+
logger = logging.getLogger(__name__)
31+
logger.setLevel(get_coreml_log_level(default_level=logging.WARNING))
32+
3333

3434
class COMPILE_SPEC_KEYS(Enum):
3535
COMPUTE_UNITS = "compute_units"
@@ -409,6 +409,7 @@ def preprocess(
409409
edge_program: ExportedProgram,
410410
compile_specs: List[CompileSpec],
411411
) -> PreprocessResult:
412+
logger.info(f"Edge program: {edge_program}")
412413
model_type: CoreMLBackend.MODEL_TYPE = (
413414
CoreMLBackend.model_type_from_compile_specs(
414415
compile_specs,

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@
99
# the op to the coremltools library.
1010

1111
import torch as _torch
12-
from coremltools import _logger as logger
12+
from coremltools import _logger
1313
from coremltools.converters.mil.frontend import _utils
1414
from coremltools.converters.mil.frontend.torch.ops import (
1515
_get_inputs,
16+
_get_kwinputs,
1617
NUM_TO_NUMPY_DTYPE,
1718
NUM_TO_TORCH_DTYPE,
1819
split,
20+
to,
1921
transpose,
2022
unbind,
2123
)
@@ -24,6 +26,7 @@
2426
register_torch_op,
2527
)
2628
from coremltools.converters.mil.mil import types
29+
from executorch.exir.dim_order_utils import get_memory_format
2730

2831

2932
# https://github.com/apple/coremltools/pull/2556
@@ -44,6 +47,26 @@ def split_copy(context, node):
4447
split(context, node)
4548

4649

50+
@register_torch_op(
51+
torch_alias=[
52+
"dim_order_ops::_to_dim_order_copy",
53+
"dim_order_ops._to_dim_order_copy",
54+
],
55+
override=False,
56+
)
57+
def _to_dim_order_copy(context, node):
58+
dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
59+
node.kwinputs.pop("dim_order")
60+
61+
# In CoreML, dim_order.val will be an ndarray, so we convert it to a list
62+
dim_order = [int(d) for d in dim_order.val]
63+
memory_format = get_memory_format(dim_order)
64+
assert (
65+
memory_format == _torch.contiguous_format
66+
), "Only contiguous memory format is supported in CoreML"
67+
to(context, node)
68+
69+
4770
# https://github.com/apple/coremltools/pull/2558
4871
@register_torch_op(
4972
torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
@@ -88,7 +111,7 @@ def dequantize_affine(context, node):
88111
out_np_dtype = None
89112
if len(inputs) > 7:
90113
out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
91-
logger.warning(
114+
_logger.warning(
92115
f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
93116
)
94117

backends/apple/coreml/logging.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright © 2023 Apple Inc. All rights reserved.
2+
#
3+
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
4+
5+
import logging
6+
import os
7+
from typing import Optional
8+
9+
10+
def get_coreml_log_level(default_level: int) -> Optional[str]:
11+
level_str = os.environ.get("ET_COREML_LOG_LEVEL", "").upper()
12+
if level_str == "":
13+
return default_level
14+
15+
level_map = {
16+
"DEBUG": logging.DEBUG,
17+
"INFO": logging.INFO,
18+
"WARNING": logging.WARNING,
19+
"ERROR": logging.ERROR,
20+
"CRITICAL": logging.CRITICAL,
21+
}
22+
if level_str not in level_map:
23+
raise ValueError(f"Invalid ET_COREML_LOG_LEVEL: {level_str}")
24+
return level_map[level_str]

backends/apple/coreml/partition/coreml_partitioner.py

Lines changed: 83 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import torch
1111

1212
from executorch.backends.apple.coreml.compiler import CoreMLBackend
13+
14+
from executorch.backends.apple.coreml.logging import get_coreml_log_level
1315
from executorch.exir.backend.compile_spec_schema import CompileSpec
1416

1517
from executorch.exir.backend.partitioner import (
@@ -18,12 +20,13 @@
1820
PartitionResult,
1921
)
2022
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
23+
from executorch.exir.dialects._ops import ops as exir_ops
2124
from torch.export.exported_program import ExportedProgram
2225
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
2326
from torch.fx.passes.operator_support import OperatorSupportBase
2427

2528
logger = logging.getLogger(__name__)
26-
logger.setLevel(logging.INFO)
29+
logger.setLevel(get_coreml_log_level(default_level=logging.INFO))
2730

2831

2932
def _is_view_op(op: torch._ops.OpOverload) -> bool:
@@ -54,6 +57,80 @@ def log_once(self, msg: str) -> None:
5457
logger.info(msg)
5558
self._logged_msgs.add(msg)
5659

60+
def should_skip_op_for_delegation(self, node_target_name: str) -> bool:
61+
skipped_ops = self.skip_ops_for_coreml_delegation or []
62+
if node_target_name in skipped_ops:
63+
assert (
64+
not self.lower_full_graph
65+
), f"Cannot skip {node_target_name} because lower_full_graph is True. Please set skip_ops_for_coreml_delegation=None or lower_full_graph=False in the CoreMLPartitioner"
66+
self.log_once(
67+
"Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
68+
+ node_target_name
69+
)
70+
return True
71+
return False
72+
73+
def should_override_support(self, node) -> bool:
74+
# https://github.com/apple/coremltools/issues/2573
75+
if (
76+
node.target
77+
in [
78+
torch.ops.aten.sub.Tensor,
79+
exir_ops.edge.aten.sub.Tensor,
80+
torch.ops.aten.add.Tensor,
81+
exir_ops.edge.aten.add.Tensor,
82+
]
83+
and "alpha" in node.kwargs
84+
and node.kwargs["alpha"] != 1
85+
):
86+
self.log_once(
87+
"torch.ops.aten.{sub, add}.Tensor with alpha != 1 is not supported by CoreML. Overriding support."
88+
)
89+
return True
90+
91+
# https://github.com/apple/coremltools/issues/2565
92+
if node.target in [
93+
torch.ops.aten.diagonal.default,
94+
torch.ops.aten.diagonal_copy.default,
95+
exir_ops.edge.aten.diagonal.default,
96+
exir_ops.edge.aten.diagonal_copy.default,
97+
]:
98+
self.log_once(
99+
"torch.ops.aten.diagonal.default has a bug in CoreML. Overriding op support."
100+
)
101+
return True
102+
103+
# https://github.com/apple/coremltools/issues/2569
104+
if node.target in [
105+
torch.ops.aten.acosh.default,
106+
exir_ops.edge.aten.acosh.default,
107+
torch.ops.aten.asinh.default,
108+
exir_ops.edge.aten.asinh.default,
109+
]:
110+
self.log_once(
111+
"torch.ops.aten.{acosh, asinh}.default is not supported by CoreML. Overriding op support."
112+
)
113+
return True
114+
115+
# TODO: enable this after bugs in ExecuTorch's partitioner are fixed
116+
# # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
117+
# # in the placeholders due to partitioning, which CoreML does not support
118+
# if not self.lower_full_graph and any(
119+
# isinstance(arg, torch.fx.Node)
120+
# and isinstance(
121+
# arg.meta.get("val", None),
122+
# (torch.SymInt, torch.SymBool, torch.SymFloat),
123+
# )
124+
# for arg in node.args
125+
# ):
126+
# self.log_once(
127+
# "Skipping op for CoreML delegation because it contains symbolic args: "
128+
# + node_target_name
129+
# )
130+
# return True
131+
132+
return False
133+
57134
def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
58135
# get_attr node can always be supported on any backend
59136
if node.op == "get_attr":
@@ -62,38 +139,17 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
62139
elif node.op == "call_function":
63140
# skip ops if specified by user
64141
node_target_name = getattr(node.target, "__name__", "").lower()
65-
if node_target_name in (self.skip_ops_for_coreml_delegation or []):
66-
self.log_once(
67-
"Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
68-
+ node_target_name
69-
)
70-
assert (
71-
not self.lower_full_graph
72-
), "Cannot have skip_ops_for_coreml_delegation when lower_full_graph is True"
73-
return False
74142

75-
# TODO: enable this after bugs in ExecuTorch's partitioner are fixed
76-
# # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
77-
# # in the placeholders due to partitioning, which CoreML does not support
78-
# if not self.lower_full_graph and any(
79-
# isinstance(arg, torch.fx.Node)
80-
# and isinstance(
81-
# arg.meta.get("val", None),
82-
# (torch.SymInt, torch.SymBool, torch.SymFloat),
83-
# )
84-
# for arg in node.args
85-
# ):
86-
# self.log_once(
87-
# "Skipping op for CoreML delegation because it contains symbolic args: "
88-
# + node_target_name
89-
# )
90-
# assert not self.lower_full_graph
91-
# return False
143+
if self.should_skip_op_for_delegation(node_target_name):
144+
return False
92145

93146
# query coremltools to see if node is supported
94147
is_supported = ct.converters.mil.frontend.torch.is_torch_fx_node_supported(
95148
node
96149
)
150+
if self.should_override_support(node):
151+
is_supported = False
152+
97153
if not is_supported:
98154
if self.lower_full_graph:
99155
raise NotImplementedError(
@@ -124,7 +180,6 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
124180

125181

126182
class CoreMLPartitioner(Partitioner):
127-
128183
def __init__(
129184
self,
130185
*,

backends/apple/coreml/runtime/delegate/multiarray.mm

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArr
123123
}
124124

125125
bool copy_using_bnns(const MultiArray& src, MultiArray& dst) {
126+
if (src.layout().dataType() != dst.layout().dataType()) {
127+
return false;
128+
}
126129
if (dst.layout().num_bytes() < src.layout().num_bytes()) {
127130
return false;
128131
}

backends/arm/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ The Arm EthosU Backend should be considered a prototype quality at this point, l
181181
## Current flows
182182

183183
The EthosUBackend has a two stage process,
184-
- Compile to TOSA to rationalise the graph into known hardware support profiles. Currently this is to v0.80 TOSA BI with specific concern to a subset which gives support on Ethos-U55 and Ethos-U85, the target of the initial prototype efforts. This calls into the TOSABackend.
185-
- Lower via the ethos-u-vela compilation flow which takes TOSA v0.80 as an input and produces a low level commandstream for the hardware which is then passed via the delegate to the ethos-u-core-driver for direct execution.
184+
- Compile to TOSA to rationalise the graph into known hardware support profiles. Currently this is to v1.0 TOSA INT with specific concern to a subset which gives support on Ethos-U55 and Ethos-U85, the target of the initial prototype efforts. This calls into the TOSABackend.
185+
- Lower via the ethos-u-vela compilation flow which takes TOSA v1.0 as an input and produces a low level commandstream for the hardware which is then passed via the delegate to the ethos-u-core-driver for direct execution.
186186

187187
The EthosUPartitioner is currenly used to ensure the operations converted are Ethos-U compatible, but will be extended to offer spec-correct TOSA Base inference and TOSA Main Inference generation in future.
188188

0 commit comments

Comments
 (0)