Regression fixes and tidyup for Arm backend (#2372)

robell · facebook-github-bot · commit 252508b366ea · 2024-03-18T20:38:18.000-07:00
Summary: Fixes for a few things: - Move C++ std to 17 to enable changes for #1733 on Arm builds - fix after #2307 regressed save_pte_program on aot_arm_compiler - add aot_arm_compiler support for examples/models (they don't all compile but can be consumed) - Move the Vela pin in anticipation of MNV2 tosa fixes in #2371 Pull Request resolved: #2372 Reviewed By: mcr229 Differential Revision: D54817009 Pulled By: digantdesai fbshipit-source-id: 5ca3c7a03e8ae9f6a784335d53654b8db05a3cb8
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@ buck-out/
 cmake-out/
 cmake-android-out/
 cmake-ios-out/
+ethos-u-scratch/
 executorch.egg-info
 __pycache__/
 build/lib/
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -11,20 +11,43 @@
 import logging
 
 import torch
-from executorch.backends.arm.arm_backend import generate_ethosu_compile_spec
 
+from executorch.backends.arm.arm_backend import generate_ethosu_compile_spec
 from executorch.backends.arm.arm_partitioner import ArmPartitioner
 from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig
 
+from ..models import MODEL_NAME_TO_MODEL
+from ..models.model_factory import EagerModelFactory
 from ..portable.utils import export_to_edge, save_pte_program
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
-logging.basicConfig(level=logging.INFO, format=FORMAT)
-
-# TODO: When we have a more reliable quantization flow through to
-#       Vela, and use the models in their original form with a
-#       quantization step in our example. This will take the models
-#       from examples/models/ and quantize then export to delegate.
+logging.basicConfig(level=logging.WARNING, format=FORMAT)
+
+# Quantize model if required using the standard export quantizaion flow.
+# For now we're using the xnnpack quantizer as this produces reasonable
+# output for our arithmetic behaviour.
+from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torch.ao.quantization.quantizer.xnnpack_quantizer import (
+    get_symmetric_quantization_config,
+    XNNPACKQuantizer,
+)
+
+
+def quantize(model, example_inputs):
+    """This is the official recommended flow for quantization in pytorch 2.0 export"""
+    logging.info("Quantizing Model...")
+    logging.debug(f"Original model: {model}")
+    quantizer = XNNPACKQuantizer()
+    # if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
+    operator_config = get_symmetric_quantization_config(is_per_channel=False)
+    quantizer.set_global(operator_config)
+    m = prepare_pt2e(model, quantizer)
+    # calibration
+    m(*example_inputs)
+    m = convert_pt2e(m)
+    logging.debug(f"Quantized model: {m}")
+    # make sure we can export to flat buffer
+    return m
 
 
 # Two simple models
@@ -93,7 +116,7 @@ def forward(self, x):
         "-m",
         "--model_name",
         required=True,
-        help=f"Provide model name. Valid ones: {list(models.keys())}",
+        help=f"Provide model name. Valid ones: {set(list(models.keys())+list(MODEL_NAME_TO_MODEL.keys()))}",
     )
     parser.add_argument(
         "-d",
@@ -103,10 +126,22 @@ def forward(self, x):
         default=False,
         help="Flag for producing ArmBackend delegated model",
     )
+    parser.add_argument(
+        "-q",
+        "--quantize",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Produce a quantized model",
+    )
 
     args = parser.parse_args()
 
-    if args.model_name not in models.keys():
+    # support models defined within this file or examples/models/ lists
+    if (
+        args.model_name not in models.keys()
+        and args.model_name not in MODEL_NAME_TO_MODEL.keys()
+    ):
         raise RuntimeError(f"Model {args.model_name} is not a valid name.")
 
     if (
@@ -116,28 +151,47 @@ def forward(self, x):
     ):
         raise RuntimeError(f"Model {args.model_name} cannot be delegated.")
 
-    model = models[args.model_name]()
-    example_inputs = models[args.model_name].example_input
+    # 1. pick model from one of the supported lists
+    model = None
+    example_inputs = None
+
+    # 1.a. models in this file
+    if args.model_name in models.keys():
+        model = models[args.model_name]()
+        example_inputs = models[args.model_name].example_input
+    # 1.b. models in the examples/models/
+    # IFF the model is not in our local models
+    elif args.model_name in MODEL_NAME_TO_MODEL.keys():
+        logging.warning(
+            "Using a model from examples/models not all of these are currently supported"
+        )
+        model, example_inputs, _ = EagerModelFactory.create_model(
+            *MODEL_NAME_TO_MODEL[args.model_name]
+        )
 
     model = model.eval()
 
     # pre-autograd export. eventually this will become torch.export
     model = torch._export.capture_pre_autograd_graph(model, example_inputs)
 
+    # Quantize if required
+    if args.quantize:
+        model = quantize(model, example_inputs)
+
     edge = export_to_edge(
         model,
         example_inputs,
         edge_compile_config=EdgeCompileConfig(
             _check_ir_validity=False,
         ),
     )
-    logging.info(f"Exported graph:\n{edge.exported_program().graph}")
+    logging.debug(f"Exported graph:\n{edge.exported_program().graph}")
 
     if args.delegate is True:
         edge = edge.to_backend(
             ArmPartitioner(generate_ethosu_compile_spec("ethos-u55-128"))
         )
-        logging.info(f"Lowered graph:\n{edge.exported_program().graph}")
+        logging.debug(f"Lowered graph:\n{edge.exported_program().graph}")
 
     exec_prog = edge.to_executorch(
         config=ExecutorchBackendConfig(extract_constant_segment=False)
@@ -146,4 +200,4 @@ def forward(self, x):
     model_name = f"{args.model_name}" + (
         "_arm_delegate" if args.delegate is True else ""
     )
-    save_pte_program(exec_prog.buffer, model_name)
+    save_pte_program(exec_prog, model_name)
diff --git a/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake b/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
@@ -36,7 +36,7 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
 
 # Select C/C++ version
 set(CMAKE_C_STANDARD 11)
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
 
 set(GCC_CPU ${CMAKE_SYSTEM_PROCESSOR})
 string(REPLACE "cortex-m85" "cortex-m55" GCC_CPU ${GCC_CPU})
diff --git a/examples/arm/run.sh b/examples/arm/run.sh
@@ -105,7 +105,7 @@ function build_executorch_runner() {
     cd ${script_dir}/executor_runner
     cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \
 	  -DTARGET_CPU=cortex-m55 \
-	  -B build \
+	  -B cmake-out \
 	  -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \
 	  -DET_DIR_PATH:PATH=${et_root_dir}         \
 	  -DET_BUILD_DIR_PATH:PATH=${et_build_dir}  \
@@ -114,9 +114,9 @@ function build_executorch_runner() {
     echo "[${FUNCNAME[0]}] Configured CMAKE"
 
     n=$(nproc)
-    cmake --build build -- -j"$((n - 5))" arm_executor_runner
+    cmake --build cmake-out -- -j"$((n - 5))" arm_executor_runner
     echo "[${FUNCNAME[0]}] Generated baremetal elf file:"
-    find build -name "arm_executor_runner"
+    find cmake-out -name "arm_executor_runner"
 }
 
 # Execute the executor_runner on FVP Simulator
diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh
@@ -215,7 +215,7 @@ function setup_vela() {
     if [[ ! -e ethos-u-vela ]]; then
         git clone https://review.mlplatform.org/ml/ethos-u/ethos-u-vela
         repo_dir="${root_dir}/ethos-u-vela"
-        base_rev=78b9412b07e0a46e58e8ecb9da8d661399c006a5
+        base_rev=b90666d9b43f4b5223bb4dcecdbee87b2ad757c2
         patch_repo
     fi
     cd "${root_dir}/ethos-u-vela"