From 7fa598cd3dadbbccab6cf2cc891932288e328c02 Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Wed, 8 Mar 2023 17:41:13 -0800 Subject: [PATCH] infra: Add Torch 1.13.1 testing to nightly CI - Add testing for Torch 1.13.1 path in CI across both TS and FX compilation paths - Disable `aten` tests for 1.13.1, to resolve Torch Dynamo import/functionality issues - Refactor quantization FX tests to resolve key error in pattern dictionary - Add parameter fields to CI to accomodate Torch 1.13.1 version - Update `dispatch_tracer` function docstrings and imports to avoid naming issue with `torch._dynamo` vs `torchdynamo` - Rename CI versioning to use "legacy" --- .circleci/config.yml | 150 +++++++++++++++++- py/setup.py | 2 +- .../fx/test/quant/test_quant_trt.py | 25 ++- .../fx/tracer/dispatch_tracer/aten_tracer.py | 13 +- 4 files changed, 175 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f2675c20dc..3a1e376405 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -524,15 +524,47 @@ commands: - store_artifacts: path: /tmp/testlogs - test-fx_converters: - description: "Test the fx converters" + test-fx_converters_acc: + description: "Test the fx acc converters" steps: - run: name: Run FX converter tests command: | cd py/torch_tensorrt/fx/test - pushd converters/ - pytest --junitxml=/tmp/artifacts/test_results/fx/converters/test_results.xml + pushd converters/acc_op/ + pytest --junitxml=/tmp/artifacts/test_results/fx/converters/acc_op/test_results.xml + popd + + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + + test-fx_converters_aten: + description: "Test the fx aten converters" + steps: + - run: + name: Run FX converter tests + command: | + cd py/torch_tensorrt/fx/test + pushd converters/aten_op/ + pytest --junitxml=/tmp/artifacts/test_results/fx/converters/aten_op/test_results.xml + popd + + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + + test-fx_converters_vanilla: + description: "Test the fx vanilla converters" + steps: + - run: + name: Run FX converter tests + command: | + cd py/torch_tensorrt/fx/test + pushd converters/vanilla/ + pytest --junitxml=/tmp/artifacts/test_results/fx/converters/vanilla/test_results.xml popd - store_test_results: @@ -587,7 +619,7 @@ commands: path: /tmp/testlogs test-fx_tracer: - description: "Test the fx tracer" + description: "Test all fx tracers" steps: - run: name: Run FX tracer @@ -602,6 +634,22 @@ commands: - store_artifacts: path: /tmp/testlogs + test-fx_tracer_acc: + description: "Test the fx acc tracer only" + steps: + - run: + name: Run FX tracer + command: | + cd py/torch_tensorrt/fx/test + pushd tracer + list_tracer=$(ls | grep test_acc) + pytest $list_tracer --junitxml=/tmp/artifacts/test_results/fx/tracer/test_results.xml + popd + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + test-fx_quant: description: "Test the fx quant" steps: @@ -625,7 +673,9 @@ commands: name: Run fx tests command: | mkdir -p /tmp/artifacts/test_results - - test-fx_converters + - test-fx_converters_acc + - test-fx_converters_aten + - test-fx_converters_vanilla - test-fx_passes - test-fx_tools - test-fx_trt_lower @@ -637,6 +687,26 @@ commands: - store_artifacts: path: /tmp/testlogs + test-fx-no-aten: + description: "Test the fx backend without aten operators" + steps: + - run: + name: Run fx tests without aten ops + command: | + mkdir -p /tmp/artifacts/test_results + - test-fx_converters_acc + - test-fx_converters_vanilla + - test-fx_passes + - test-fx_tools + - test-fx_trt_lower + - test-fx_tracer_acc + - test-fx_core + - test-fx_quant + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + # Define a job to be invoked later in a workflow. # See: https://circleci.com/docs/2.0/configuration-reference/#jobs jobs: @@ -782,6 +852,37 @@ jobs: - dump-test-env - test-fx + test-py-fx-x86_64-linux-no-aten: + parameters: + torch-build: + type: string + torch-build-index: + type: string + trt-version-long: + type: string + machine: + image: ubuntu-2004-cuda-11.4:202110-01 + resource_class: gpu.nvidia.large + steps: + - checkout + - attach_workspace: + at: /tmp/dist/ + - install-torch-from-index: + torch-build: << parameters.torch-build >> + torch-build-index: << parameters.torch-build-index >> + - create-py-env: + trt-version-long: << parameters.trt-version-long >> + - install-cudnn + # - run: + # name: "Set LD_LIBRARY_PATH path to include the installed CUDNN" + # command: export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH + - run: + name: "Install torch-tensorrt" + command: pip3 install --pre /tmp/dist/x86_64-linux/*cp39-cp39*.whl + # We install torch after torch-trt because pip automatically enforces the version constraint otherwise + - dump-test-env + - test-fx-no-aten + package-x86_64-linux: parameters: enabled: @@ -1074,6 +1175,12 @@ parameters: torch-build-index: type: string default: "https://download.pytorch.org/whl/nightly/cu117" + torch-build-legacy: + type: string + default: "1.13.1+cu117" + torch-build-index-legacy: + type: string + default: "https://download.pytorch.org/whl/cu117" cudnn-version: type: string default: "8.5.0.96" @@ -1127,6 +1234,7 @@ workflows: - release/**/* jobs: - build-x86_64-linux: + name: build-x86_64-linux torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> @@ -1153,6 +1261,36 @@ workflows: requires: - build-x86_64-linux + - build-x86_64-linux: + name: build-x86_64-linux-legacy + torch-build: << pipeline.parameters.torch-build-legacy >> + torch-build-index: << pipeline.parameters.torch-build-index-legacy >> + + - test-core-cpp-x86_64-linux: + name: test-core-cpp-x86_64-linux-legacy + torch-build: << pipeline.parameters.torch-build-legacy >> + torch-build-index: << pipeline.parameters.torch-build-index-legacy >> + trt-version-short: << pipeline.parameters.trt-version-short >> + trt-version-long: << pipeline.parameters.trt-version-long >> + cudnn-version: << pipeline.parameters.cudnn-version >> + requires: + - build-x86_64-linux-legacy + + - test-py-ts-x86_64-linux: + name: test-py-ts-x86_64-linux-legacy + torch-build: << pipeline.parameters.torch-build-legacy >> + torch-build-index: << pipeline.parameters.torch-build-index-legacy >> + trt-version-long: << pipeline.parameters.trt-version-long >> + requires: + - build-x86_64-linux-legacy + + - test-py-fx-x86_64-linux-no-aten: + torch-build: << pipeline.parameters.torch-build-legacy >> + torch-build-index: << pipeline.parameters.torch-build-index-legacy >> + trt-version-long: << pipeline.parameters.trt-version-long >> + requires: + - build-x86_64-linux-legacy + release: when: << pipeline.parameters.enable-packaging >> jobs: diff --git a/py/setup.py b/py/setup.py index 672f508393..f7247a9f90 100644 --- a/py/setup.py +++ b/py/setup.py @@ -380,7 +380,7 @@ def run(self): long_description=long_description, ext_modules=ext_modules, install_requires=[ - "torch>=1.14.0.dev0", + "torch>=1.13.1", ], setup_requires=[], cmdclass={ diff --git a/py/torch_tensorrt/fx/test/quant/test_quant_trt.py b/py/torch_tensorrt/fx/test/quant/test_quant_trt.py index 5d5e747505..a78aaedf2e 100644 --- a/py/torch_tensorrt/fx/test/quant/test_quant_trt.py +++ b/py/torch_tensorrt/fx/test/quant/test_quant_trt.py @@ -696,7 +696,6 @@ def conv_add_extra_inputs_getter(pattern): return [extra_input] conv_add_config = { - "pattern_complex_format": (operator.add, torch.nn.Conv2d, MatchAllNode), "observation_type": ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT, "dtype_configs": [ weighted_op_qint8_dtype_config, @@ -707,6 +706,15 @@ def conv_add_extra_inputs_getter(pattern): "reference_quantized_module_for_root": torch.nn.quantized._reference.Conv2d, } + if torch.__version__.startswith("1"): + conv_add_config["pattern"] = (operator.add, torch.nn.Conv2d, MatchAllNode) + else: + conv_add_config["pattern_complex_format"] = ( + operator.add, + torch.nn.Conv2d, + MatchAllNode, + ) + m = M().eval() modified_backend_config_dict = copy.deepcopy(self.trt_backend_config_dict) modified_backend_config_dict["configs"].insert(0, conv_add_config) @@ -764,10 +772,6 @@ def forward(self, x): } conv_add_config = { - "pattern_complex_format": ( - torch.nn.ReLU, - (operator.add, torch.nn.Conv2d, MatchAllNode), - ), "observation_type": ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT, "dtype_configs": [ weighted_op_quint8_dtype_config, @@ -776,6 +780,17 @@ def forward(self, x): # "reference_quantized_module_for_root": torch.nn.quantized._reference.Conv2d, } + if torch.__version__.startswith("1"): + conv_add_config["pattern"] = ( + torch.nn.ReLU, + (operator.add, torch.nn.Conv2d, MatchAllNode), + ) + else: + conv_add_config["pattern_complex_format"] = ( + torch.nn.ReLU, + (operator.add, torch.nn.Conv2d, MatchAllNode), + ) + conv_config = { "pattern": torch.nn.Conv2d, "observation_type": ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT, diff --git a/py/torch_tensorrt/fx/tracer/dispatch_tracer/aten_tracer.py b/py/torch_tensorrt/fx/tracer/dispatch_tracer/aten_tracer.py index b35c6958a8..2a252bd965 100644 --- a/py/torch_tensorrt/fx/tracer/dispatch_tracer/aten_tracer.py +++ b/py/torch_tensorrt/fx/tracer/dispatch_tracer/aten_tracer.py @@ -4,8 +4,10 @@ from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Union import torch -import torch._dynamo as torchdynamo -from torch._dynamo.guards import Guard + +if not torch.__version__.startswith("1"): + import torch._dynamo as torchdynamo + from torch.fx.passes.infra.pass_base import PassResult from torch_tensorrt.fx.passes.lower_basic_pass_aten import ( @@ -96,12 +98,17 @@ def dynamo_trace( aten_graph: bool, tracing_mode: str = "real", dynamo_config: Optional[DynamoConfig] = None, -) -> Tuple[torch.fx.GraphModule, Set[Guard]]: +) -> Tuple[torch.fx.GraphModule, Set]: """ TODO: Once we fully migrate to torchdynamo frontend, we will remove this config option alltogether. For now, it helps with quick experiments with playing around with TorchDynamo """ + if torch.__version__.startswith("1"): + raise ValueError( + f"The aten tracer requires Torch version >= 2.0. Detected version {torch.__version__}" + ) + if dynamo_config is None: dynamo_config = DynamoConfig() with using_config(dynamo_config), setting_python_recursive_limit(2000):