Skip to content

Commit 990a0bd

Browse files
committed
Update on "[ExecuTorch][Weight Sharing][XNNPACK] Serialize constant tensors into named data map"
We serialize tensors into the named data map, and return the output in preprocess result. Allowing for XNNPACK to share tensors with the same name (instead of duplicating). A key change here is with fused tensors. For BN and Convolution Fusion, we fuse the conv weights and bias with the BN parameters creating new tensors. We then create get_attr nodes for these new parameters. Due to the graph.fx interpreter in export pass base, the new names we create for these new tensors are lost each time. As a result, at the end we introduce a new pass to preserve the names we created. This seems a little hacky for now, but is the only way to preserve the new fused names. Differential Revision: [D70315207](https://our.internmc.facebook.com/intern/diff/D70315207/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D70315207/)! [ghstack-poisoned]
2 parents ffdf36f + 930e140 commit 990a0bd

File tree

121 files changed

+2670
-960
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+2670
-960
lines changed

.ci/scripts/unittest-buck2.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
1717
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
1818
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
1919

20+
UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
21+
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2022
# TODO: expand the covered scope of Buck targets.
2123
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
2224
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
23-
buck2 build //runtime/backend/... //runtime/core/... //runtime/executor: //runtime/kernel/... //runtime/platform/...
24-
buck2 test //runtime/backend/... //runtime/core/... //runtime/executor: //runtime/kernel/... //runtime/platform/...
25+
buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... //runtime/backend/... //runtime/core/... \
26+
//runtime/executor: //runtime/kernel/... //runtime/platform/...

.lintrunner.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ exclude_patterns = [
218218
'examples/**',
219219
'extension/**',
220220
'kernels/optimized/**',
221+
# Justified <functional> include.
222+
'runtime/kernel/thread_parallel_interface.h',
221223
'scripts/**',
222224
'third-party/**',
223225
'util/**',

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
751751
AND EXECUTORCH_BUILD_CPUINFO
752752
)
753753
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
754-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
755754
endif()
756755

757756
if(EXECUTORCH_BUILD_PYBIND)

CODEOWNERS

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,31 +52,31 @@
5252
/extension/export_util @kimishpatel
5353
/extension/flat_tensor @lucylq
5454
/extension/gguf_util @larryliu0820
55-
/extension/kernel_util @kimishpatel @manuelcandales
56-
/extension/llm @jackzhxng @iseeyuan @larryliu0820
57-
/extension/memory_allocator @JacobSzwejbka
55+
/extension/kernel_util @kimishpatel @manuelcandales @swolchok
56+
/extension/llm @jackzhxng @iseeyuan @larryliu0820 @swolchok
57+
/extension/memory_allocator @JacobSzwejbka @swolchok
5858
/extension/module @shoumikhin
59-
/extension/parallel @kimishpatel
59+
/extension/parallel @kimishpatel @swolchok
6060
/extension/pybindings @JacobSzwejbka @larryliu0820
61-
/extension/pytree @JacobSzwejbka
62-
# /extension/runner_util @dbort
61+
/extension/pytree @JacobSzwejbka @swolchok
62+
/extension/runner_util @swolchok
6363
/extension/tensor @shoumikhin
64-
# /extension/testing_util @dbort
65-
/extension/threadpool @kimishpatel
64+
/extension/testing_util @swolchok
65+
/extension/threadpool @kimishpatel @swolchok
6666
/extension/training @JacobSzwejbka
6767

68-
/kernels @manuelcandales
68+
/kernels @manuelcandales @swolchok
6969

7070
/profiler @tarun292 @Gasoonjia
7171

72-
/runtime @JacobSzwejbka @lucylq
72+
/runtime @JacobSzwejbka @lucylq @swolchok
7373
/runtime/backend @cccclai
7474

7575
/schema @JacobSzwejbka @lucylq
7676

77-
/scripts @GregoryComer
77+
/scripts @GregoryComer @swolchok
7878

79-
/shim @larryliu0820 @GregoryComer
79+
/shim @larryliu0820 @GregoryComer @swolchok
8080

8181
/third-party @GregoryComer
8282

Test.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ if(BUILD_TESTING)
1313
add_subdirectory(extension/evalue_util/test)
1414
add_subdirectory(extension/kernel_util/test)
1515
add_subdirectory(extension/memory_allocator/test)
16-
add_subdirectory(extension/parallel/test)
1716
add_subdirectory(extension/pytree/test)
1817
add_subdirectory(kernels/portable/cpu/util/test)
1918
add_subdirectory(kernels/prim_ops/test)

backends/arm/test/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ def pytest_configure(config):
3232
pytest._test_options = {} # type: ignore[attr-defined]
3333
pytest._test_options["corstone_fvp"] = False # type: ignore[attr-defined]
3434

35-
if config.option.arm_run_corstoneFVP:
35+
if (
36+
getattr(config.option, "arm_run_corstoneFVP", False)
37+
and config.option.arm_run_corstoneFVP
38+
):
3639
corstone300_exists = shutil.which("FVP_Corstone_SSE-300_Ethos-U55")
3740
corstone320_exists = shutil.which("FVP_Corstone_SSE-320")
3841
if not (corstone300_exists and corstone320_exists):

backends/cadence/aot/TARGETS

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,23 @@ python_library(
115115
],
116116
deps = [
117117
"fbcode//caffe2:torch",
118-
"fbcode//executorch/exir:scalar_type",
119118
"fbcode//executorch/backends/cadence/aot:utils",
120119
],
121120
)
122121

122+
python_library(
123+
name = "ref_implementations",
124+
srcs = [
125+
"ref_implementations.py",
126+
],
127+
typing = True,
128+
deps = [
129+
"fbcode//caffe2:torch",
130+
"fbcode//executorch/exir:scalar_type",
131+
],
132+
)
133+
134+
123135
export_file(name = "functions.yaml")
124136

125137
executorch_generated_lib(

backends/cadence/aot/compiler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ def export_to_edge(
198198
_skip_dim_order=True,
199199
# Allow specific non-core aten ops in the IR.
200200
_core_aten_ops_exception_list=[
201+
torch.ops.aten._linalg_det.default,
202+
torch.ops.aten._linalg_svd.default,
201203
torch.ops.aten._native_batch_norm_legit_functional.default,
202204
torch.ops.aten.linear.default,
203205
torch.ops.aten.linalg_vector_norm.default,

backends/cadence/aot/export_example.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def export_model(
3838
example_inputs: Tuple[Any, ...],
3939
file_name: str = "CadenceDemoModel",
4040
run_and_compare: bool = True,
41+
eps_error: float = 1e-1,
42+
eps_warn: float = 1e-5,
4143
):
4244
# create work directory for outputs and model binary
4345
working_dir = tempfile.mkdtemp(dir="/tmp")
@@ -89,4 +91,6 @@ def export_model(
8991
inputs=example_inputs,
9092
ref_outputs=ref_outputs,
9193
working_dir=working_dir,
94+
eps_error=eps_error,
95+
eps_warn=eps_warn,
9296
)

backends/cadence/aot/functions.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,8 @@
248248
kernels:
249249
- arg_meta: null
250250
kernel_name: impl::reference::quantized_fully_connected_per_tensor_out
251+
252+
- func: cadence::requantize.out(Tensor input, Tensor in_scale, Tensor in_zero_point, Tensor out_scale, Tensor out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
253+
kernels:
254+
- arg_meta: null
255+
kernel_name: impl::reference::requantize_out

0 commit comments

Comments
 (0)