From d6ea0d56368d348d9035acc311a44626fa96e979 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 17 Aug 2022 09:10:01 -0700 Subject: [PATCH 01/31] feat: Jetson specific workspace file Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- py/setup.py | 17 +++-- toolchains/BUILD | 9 +++ toolchains/jp_workspaces/WORKSPACE.jp50 | 96 +++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 toolchains/jp_workspaces/WORKSPACE.jp50 diff --git a/py/setup.py b/py/setup.py index fc7019ce31..88626a9e04 100644 --- a/py/setup.py +++ b/py/setup.py @@ -72,12 +72,18 @@ def get_git_revision_short_hash() -> str: elif version == "4.6": JETPACK_VERSION = "4.6" elif version == "5.0": - JETPACK_VERSION = "4.6" + JETPACK_VERSION = "5.0" + if not JETPACK_VERSION: warnings.warn( - "Assuming jetpack version to be 4.6 or greater, if not use the --jetpack-version option" + "Assuming jetpack version to be 5.0, if not use the --jetpack-version option" + ) + JETPACK_VERSION = "5.0" + + if not CXX11_ABI: + warnings.warn( + "Jetson platform detected but did not see --use-cxx11-abi option, if using a pytorch distribution provided by NVIDIA include this flag" ) - JETPACK_VERSION = "4.6" def which(program): @@ -128,7 +134,10 @@ def build_libtorchtrt_pre_cxx11_abi(develop=True, use_dist_dir=True, cxx11_abi=F print("Jetpack version: 4.5") elif JETPACK_VERSION == "4.6": cmd.append("--platforms=//toolchains:jetpack_4.6") - print("Jetpack version: >=4.6") + print("Jetpack version: 4.6") + elif JETPACK_VERSION == "5.0": + cmd.append("--platforms=//toolchains:jetpack_5.0") + print("Jetpack version: 5.0") if CI_RELEASE: cmd.append("--platforms=//toolchains:ci_rhel_x86_64_linux") diff --git a/toolchains/BUILD b/toolchains/BUILD index 117cdcf498..aa6486d075 100644 --- a/toolchains/BUILD +++ b/toolchains/BUILD @@ -26,6 +26,15 @@ platform( ], ) +platform( + name = "jetpack_5.0", + constraint_values = [ + "@platforms//os:linux", + "@platforms//cpu:aarch64", + "@//toolchains/jetpack:4.6", + ], +) + platform( name = "ci_rhel_x86_64_linux", constraint_values = [ diff --git a/toolchains/jp_workspaces/WORKSPACE.jp50 b/toolchains/jp_workspaces/WORKSPACE.jp50 new file mode 100644 index 0000000000..a5c054dd23 --- /dev/null +++ b/toolchains/jp_workspaces/WORKSPACE.jp50 @@ -0,0 +1,96 @@ +workspace(name = "Torch-TensorRT") + +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_python", + sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz", +) + +load("@rules_python//python:pip.bzl", "pip_install") + +http_archive( + name = "rules_pkg", + sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz", + "https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz", + ], +) + +load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") + +rules_pkg_dependencies() + +git_repository( + name = "googletest", + commit = "703bd9caab50b139428cea1aaff9974ebee5742e", + remote = "https://github.com/google/googletest", + shallow_since = "1570114335 -0400", +) + +# External dependency for torch_tensorrt if you already have precompiled binaries. +local_repository( + name = "torch_tensorrt", + path = "/opt/conda/lib/python3.8/site-packages/torch_tensorrt", +) + +# CUDA should be installed on the system locally +new_local_repository( + name = "cuda", + build_file = "@//third_party/cuda:BUILD", + path = "/usr/local/cuda-11.4/", +) + +new_local_repository( + name = "cublas", + build_file = "@//third_party/cublas:BUILD", + path = "/usr", +) + +#################################################################################### +# Locally installed dependencies (use in cases of custom dependencies or aarch64) +#################################################################################### + +# NOTE: In the case you are using just the pre-cxx11-abi path or just the cxx11 abi path +# with your local libtorch, just point deps at the same path to satisfy bazel. + +# NOTE: NVIDIA's aarch64 PyTorch (python) wheel file uses the CXX11 ABI unlike PyTorch's standard +# x86_64 python distribution. If using NVIDIA's version just point to the root of the package +# for both versions here and do not use --config=pre-cxx11-abi + +new_local_repository( + name = "libtorch", + path = "/usr/local/lib/python3.8/dist-packages/torch", + build_file = "third_party/libtorch/BUILD" +) + +# NOTE: Unused on aarch64-jetson with NVIDIA provided PyTorch distribu†ion +new_local_repository( + name = "libtorch_pre_cxx11_abi", + path = "/usr/local/lib/python3.8/dist-packages/torch", + build_file = "third_party/libtorch/BUILD" +) + +new_local_repository( + name = "cudnn", + path = "/usr/", + build_file = "@//third_party/cudnn/local:BUILD" +) + +new_local_repository( + name = "tensorrt", + path = "/usr/", + build_file = "@//third_party/tensorrt/local:BUILD" +) + +######################################################################### +# Development Dependencies (optional - comment out on aarch64) +######################################################################### + +pip_install( + name = "devtools_deps", + requirements = "//:requirements-dev.txt", +) From 7f60b18a0496e626fdf57160036df0a79a39efe7 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 17 Aug 2022 09:17:14 -0700 Subject: [PATCH 02/31] docs: Update jetson instructions Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- docsrc/tutorials/installation.rst | 70 ++++--------------------------- 1 file changed, 9 insertions(+), 61 deletions(-) diff --git a/docsrc/tutorials/installation.rst b/docsrc/tutorials/installation.rst index ed5dea5df3..937dd1dde9 100644 --- a/docsrc/tutorials/installation.rst +++ b/docsrc/tutorials/installation.rst @@ -303,54 +303,13 @@ Enviorment Setup To build natively on aarch64-linux-gnu platform, configure the ``WORKSPACE`` with local available dependencies. -1. Disable the rules with ``http_archive`` for x86_64 by commenting the following rules: - -.. code-block:: shell - - #http_archive( - # name = "libtorch", - # build_file = "@//third_party/libtorch:BUILD", - # strip_prefix = "libtorch", - # urls = ["https://download.pytorch.org/libtorch/cu102/libtorch-cxx11-abi-shared-with-deps-1.5.1.zip"], - # sha256 = "cf0691493d05062fe3239cf76773bae4c5124f4b039050dbdd291c652af3ab2a" - #) - - #http_archive( - # name = "libtorch_pre_cxx11_abi", - # build_file = "@//third_party/libtorch:BUILD", - # strip_prefix = "libtorch", - # sha256 = "818977576572eadaf62c80434a25afe44dbaa32ebda3a0919e389dcbe74f8656", - # urls = ["https://download.pytorch.org/libtorch/cu102/libtorch-shared-with-deps-1.5.1.zip"], - #) - - # Download these tarballs manually from the NVIDIA website - # Either place them in the distdir directory in third_party and use the --distdir flag - # or modify the urls to "file:////.tar.gz - - #http_archive( - # name = "cudnn", - # urls = ["https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.0.1.13/10.2_20200626/cudnn-10.2-linux-x64-v8.0.1.13.tgz"], - # build_file = "@//third_party/cudnn/archive:BUILD", - # sha256 = "0c106ec84f199a0fbcf1199010166986da732f9b0907768c9ac5ea5b120772db", - # strip_prefix = "cuda" - #) - - #http_archive( - # name = "tensorrt", - # urls = ["https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/7.1/tars/TensorRT-7.1.3.4.Ubuntu-18.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz"], - # build_file = "@//third_party/tensorrt/archive:BUILD", - # sha256 = "9205bed204e2ae7aafd2e01cce0f21309e281e18d5bfd7172ef8541771539d41", - # strip_prefix = "TensorRT-7.1.3.4" - #) - - NOTE: You may also need to configure the CUDA version to 10.2 by setting the path for the cuda new_local_repository - +1. Replace ``WORKSPACE`` with the corresponding WORKSPACE file in ``//toolchains/jp_workspaces`` 2. Configure the correct paths to directory roots containing local dependencies in the ``new_local_repository`` rules: NOTE: If you installed PyTorch using a pip package, the correct path is the path to the root of the python torch package. - In the case that you installed with ``sudo pip install`` this will be ``/usr/local/lib/python3.6/dist-packages/torch``. - In the case you installed with ``pip install --user`` this will be ``$HOME/.local/lib/python3.6/site-packages/torch``. + In the case that you installed with ``sudo pip install`` this will be ``/usr/local/lib/python3.8/dist-packages/torch``. + In the case you installed with ``pip install --user`` this will be ``$HOME/.local/lib/python3.8/site-packages/torch``. In the case you are using NVIDIA compiled pip packages, set the path for both libtorch sources to the same path. This is because unlike PyTorch on x86_64, NVIDIA aarch64 PyTorch uses the CXX11-ABI. If you compiled for source using the pre_cxx11_abi and only would like to @@ -360,27 +319,16 @@ use that library, set the paths to the same path but when you compile make sure new_local_repository( name = "libtorch", - path = "/usr/local/lib/python3.6/dist-packages/torch", + path = "/usr/local/lib/python3.8/dist-packages/torch", build_file = "third_party/libtorch/BUILD" ) new_local_repository( name = "libtorch_pre_cxx11_abi", - path = "/usr/local/lib/python3.6/dist-packages/torch", + path = "/usr/local/lib/python3.8/dist-packages/torch", build_file = "third_party/libtorch/BUILD" ) - new_local_repository( - name = "cudnn", - path = "/usr/", - build_file = "@//third_party/cudnn/local:BUILD" - ) - - new_local_repository( - name = "tensorrt", - path = "/usr/", - build_file = "@//third_party/tensorrt/local:BUILD" - ) Compile C++ Library and Compiler CLI ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -389,19 +337,19 @@ Compile C++ Library and Compiler CLI .. code-block:: shell - --platforms //toolchains:jetpack_4.x + --platforms //toolchains:jetpack_x.x Compile Torch-TensorRT library using bazel command: .. code-block:: shell - bazel build //:libtorchtrt --platforms //toolchains:jetpack_4.6 + bazel build //:libtorchtrt --platforms //toolchains:jetpack_5.0 Compile Python API ^^^^^^^^^^^^^^^^^^^^ - NOTE: Due to shifting dependencies locations between Jetpack 4.5 and Jetpack 4.6 there is now a flag for ``setup.py`` which sets the jetpack version (default: 4.6) + NOTE: Due to shifting dependencies locations between Jetpack 4.5 and newer Jetpack verisons there is now a flag for ``setup.py`` which sets the jetpack version (default: 5.0) Compile the Python API using the following command from the ``//py`` directory: @@ -411,4 +359,4 @@ Compile the Python API using the following command from the ``//py`` directory: If you have a build of PyTorch that uses Pre-CXX11 ABI drop the ``--use-cxx11-abi`` flag -If you are building for Jetpack 4.5 add the ``--jetpack-version 4.5`` flag +If you are building for Jetpack 4.5 add the ``--jetpack-version 5.0`` flag From 61adecfcbf4b62c2a4104d9d3103cb19617b1ed1 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 17 Aug 2022 15:42:38 -0700 Subject: [PATCH 03/31] docs: Update docgen task to not require a special container Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .github/workflows/docgen.yml | 2 +- docsrc/WORKSPACE.docs | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 docsrc/WORKSPACE.docs diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index cf74831e74..c1d6f0944c 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -47,4 +47,4 @@ jobs: file_pattern: docs/ commit_user_name: Torch-TensorRT Github Bot commit_user_email: torch-tensorrt.github.bot@nvidia.com - commit_author: Torch-TensorRT Github Bot + commit_author: Torch-TensorRT Github Bot \ No newline at end of file diff --git a/docsrc/WORKSPACE.docs b/docsrc/WORKSPACE.docs new file mode 100644 index 0000000000..e69de29bb2 From cf547928fc32d628f0c0f2d7faf0963b5fb5b019 Mon Sep 17 00:00:00 2001 From: Michael Feliz Date: Tue, 14 Jun 2022 14:52:45 -0700 Subject: [PATCH 04/31] Checkpoint, initial test and implementation --- .../converters/impl/constant_pad.cpp | 13 +++-------- .../converters/test_constant_pad.cpp | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/core/conversion/converters/impl/constant_pad.cpp b/core/conversion/converters/impl/constant_pad.cpp index 679a23f875..670687cf1d 100644 --- a/core/conversion/converters/impl/constant_pad.cpp +++ b/core/conversion/converters/impl/constant_pad.cpp @@ -21,7 +21,8 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto padding = args[1].unwrapToIntList().vec(); int64_t padSize = padding.size(); auto value = args[2].unwrapToScalar().to(); - + at::Tensor value_tensor = torch::tensor(value, util::TRTDataTypeToScalarType(in->getType())); + auto valueTensor = tensor_to_const(ctx, value_tensor); TORCHTRT_CHECK(padSize % 2 == 0, "Length of pad must be even but instead it equals " << padSize); int64_t l_pad = padSize / 2; @@ -55,8 +56,6 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto fill_layer = ctx->net->addFill(nvinfer1::Dims{1, {1}}, nvinfer1::FillOperation::kLINSPACE); auto shape_gather_out = ctx->net->addShape(*left_gather_out)->getOutput(0); fill_layer->setInput(0, *shape_gather_out); - at::Tensor value_tensor = torch::tensor(value, torch::kFloat32); - auto valueTensor = tensor_to_const(ctx, value_tensor); fill_layer->setInput(1, *valueTensor); at::Tensor delta_tensor = torch::zeros(inRank); auto deltaTensor = tensor_to_const(ctx, delta_tensor); @@ -69,8 +68,6 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns } else { inDims.d[axis] = padding[padding_index]; auto fill_layer = ctx->net->addFill(inDims, nvinfer1::FillOperation::kLINSPACE); - at::Tensor value_tensor = torch::tensor(value, torch::kFloat32); - auto valueTensor = tensor_to_const(ctx, value_tensor); fill_layer->setInput(1, *valueTensor); at::Tensor delta_tensor = torch::zeros(inRank); auto deltaTensor = tensor_to_const(ctx, delta_tensor); @@ -111,9 +108,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns // fill the right_gather_out with value auto fill_layer = ctx->net->addFill(nvinfer1::Dims{1, {1}}, nvinfer1::FillOperation::kLINSPACE); auto shape_gather_out = ctx->net->addShape(*right_gather_out)->getOutput(0); - fill_layer->setInput(0, *shape_gather_out); - at::Tensor value_tensor = torch::tensor(value, torch::kFloat32); - auto valueTensor = tensor_to_const(ctx, value_tensor); + fill_layer->setInput(0, *shape_gather_out); fill_layer->setInput(1, *valueTensor); at::Tensor delta_tensor = torch::zeros(inRank); auto deltaTensor = tensor_to_const(ctx, delta_tensor); @@ -126,8 +121,6 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns } else { inDims.d[axis] = padding[padding_index + 1]; auto fill_layer = ctx->net->addFill(inDims, nvinfer1::FillOperation::kLINSPACE); - at::Tensor value_tensor = torch::tensor(value, torch::kFloat32); - auto valueTensor = tensor_to_const(ctx, value_tensor); fill_layer->setInput(1, *valueTensor); at::Tensor delta_tensor = torch::zeros(inRank); auto deltaTensor = tensor_to_const(ctx, delta_tensor); diff --git a/tests/core/conversion/converters/test_constant_pad.cpp b/tests/core/conversion/converters/test_constant_pad.cpp index 9b37be4352..c5f0bd8a31 100644 --- a/tests/core/conversion/converters/test_constant_pad.cpp +++ b/tests/core/conversion/converters/test_constant_pad.cpp @@ -28,6 +28,29 @@ TEST(Converters, ATenConstantPad1dTensorConvertsCorrectly) { torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); } +TEST(Converters, ATenConstantPad1dIntTensorConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor): + %1 : int[] = prim::Constant[value=[2, 3]]() + %2 : Scalar = prim::Constant[value=2]() + %3 : Tensor = aten::constant_pad_nd(%0, %1, %2) + return (%3))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {1, 3, 4}, {at::kCUDA}).toType(at::kInt); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + TEST(Converters, ATenConstantPad1dRightZeroTensorConvertsCorrectly) { const auto graph = R"IR( graph(%0 : Tensor): From df240790d622b6c2d4d402d870c65fe66eba7396 Mon Sep 17 00:00:00 2001 From: Michael Feliz Date: Tue, 14 Jun 2022 17:36:17 -0700 Subject: [PATCH 05/31] Fix error in layer conversion caused by zero/ones tensors of wrong type --- core/conversion/converters/impl/constant_pad.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/conversion/converters/impl/constant_pad.cpp b/core/conversion/converters/impl/constant_pad.cpp index 670687cf1d..da2490eab1 100644 --- a/core/conversion/converters/impl/constant_pad.cpp +++ b/core/conversion/converters/impl/constant_pad.cpp @@ -57,7 +57,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto shape_gather_out = ctx->net->addShape(*left_gather_out)->getOutput(0); fill_layer->setInput(0, *shape_gather_out); fill_layer->setInput(1, *valueTensor); - at::Tensor delta_tensor = torch::zeros(inRank); + at::Tensor delta_tensor = torch::zeros(inRank, util::TRTDataTypeToScalarType(in->getType())); auto deltaTensor = tensor_to_const(ctx, delta_tensor); fill_layer->setInput(2, *deltaTensor); auto padTensor = fill_layer->getOutput(0); @@ -69,7 +69,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns inDims.d[axis] = padding[padding_index]; auto fill_layer = ctx->net->addFill(inDims, nvinfer1::FillOperation::kLINSPACE); fill_layer->setInput(1, *valueTensor); - at::Tensor delta_tensor = torch::zeros(inRank); + at::Tensor delta_tensor = torch::zeros(inRank, util::TRTDataTypeToScalarType(in->getType())); auto deltaTensor = tensor_to_const(ctx, delta_tensor); fill_layer->setInput(2, *deltaTensor); auto padTensor = fill_layer->getOutput(0); @@ -110,7 +110,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto shape_gather_out = ctx->net->addShape(*right_gather_out)->getOutput(0); fill_layer->setInput(0, *shape_gather_out); fill_layer->setInput(1, *valueTensor); - at::Tensor delta_tensor = torch::zeros(inRank); + at::Tensor delta_tensor = torch::zeros(inRank, util::TRTDataTypeToScalarType(in->getType())); auto deltaTensor = tensor_to_const(ctx, delta_tensor); fill_layer->setInput(2, *deltaTensor); auto padTensor = fill_layer->getOutput(0); @@ -122,7 +122,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns inDims.d[axis] = padding[padding_index + 1]; auto fill_layer = ctx->net->addFill(inDims, nvinfer1::FillOperation::kLINSPACE); fill_layer->setInput(1, *valueTensor); - at::Tensor delta_tensor = torch::zeros(inRank); + at::Tensor delta_tensor = torch::zeros(inRank, util::TRTDataTypeToScalarType(in->getType())); auto deltaTensor = tensor_to_const(ctx, delta_tensor); fill_layer->setInput(2, *deltaTensor); auto padTensor = fill_layer->getOutput(0); From 6aee08451e9e4b776cd6f3cea629d8fd4571ec7d Mon Sep 17 00:00:00 2001 From: Michael Feliz Date: Thu, 16 Jun 2022 11:38:36 -0700 Subject: [PATCH 06/31] Sort input values in registerSegmentsOutputs to resolve nondeterminism --- core/partitioning/partitioning.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index f083f6fbe5..892938d82a 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -178,7 +178,8 @@ void resolveTRTNonTensorInputs(PartitionedGraph& segmented_blocks) { void registerSegmentsOutputs(PartitionedGraph& segmented_blocks, torch::jit::Block* block) { // find the corresponding raw values in original global graph for this segmented block's inputs/outputs - std::set input_values; + auto cmp = [](torch::jit::Value* a, torch::jit::Value* b) { return a->unique() < b->unique();}; + std::set input_values(cmp); for (auto& seg_block : segmented_blocks) { for (auto& input : seg_block.raw_inputs()) { input_values.insert(input); From bf6645f0f1422d1833e5de695a6b1ca181a4b030 Mon Sep 17 00:00:00 2001 From: Michael Feliz Date: Thu, 18 Aug 2022 10:07:08 -0700 Subject: [PATCH 07/31] lint --- core/partitioning/partitioning.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index 892938d82a..c329b33ef6 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -178,7 +178,7 @@ void resolveTRTNonTensorInputs(PartitionedGraph& segmented_blocks) { void registerSegmentsOutputs(PartitionedGraph& segmented_blocks, torch::jit::Block* block) { // find the corresponding raw values in original global graph for this segmented block's inputs/outputs - auto cmp = [](torch::jit::Value* a, torch::jit::Value* b) { return a->unique() < b->unique();}; + auto cmp = [](torch::jit::Value* a, torch::jit::Value* b) { return a->unique() < b->unique(); }; std::set input_values(cmp); for (auto& seg_block : segmented_blocks) { for (auto& input : seg_block.raw_inputs()) { From 911ab5b9dee788ca1aa668e02e9de8a406e42f82 Mon Sep 17 00:00:00 2001 From: Michael Feliz Date: Thu, 18 Aug 2022 10:13:57 -0700 Subject: [PATCH 08/31] lint --- core/conversion/converters/impl/constant_pad.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/conversion/converters/impl/constant_pad.cpp b/core/conversion/converters/impl/constant_pad.cpp index da2490eab1..6d3f1ab609 100644 --- a/core/conversion/converters/impl/constant_pad.cpp +++ b/core/conversion/converters/impl/constant_pad.cpp @@ -108,7 +108,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns // fill the right_gather_out with value auto fill_layer = ctx->net->addFill(nvinfer1::Dims{1, {1}}, nvinfer1::FillOperation::kLINSPACE); auto shape_gather_out = ctx->net->addShape(*right_gather_out)->getOutput(0); - fill_layer->setInput(0, *shape_gather_out); + fill_layer->setInput(0, *shape_gather_out); fill_layer->setInput(1, *valueTensor); at::Tensor delta_tensor = torch::zeros(inRank, util::TRTDataTypeToScalarType(in->getType())); auto deltaTensor = tensor_to_const(ctx, delta_tensor); From 6b77b721f140bb465477582c557284de53e62453 Mon Sep 17 00:00:00 2001 From: Michael Feliz Date: Thu, 30 Jun 2022 17:17:59 -0700 Subject: [PATCH 09/31] feat: Add support for aten::square --- core/conversion/converters/impl/element_wise.cpp | 12 ++++++++++++ .../core/conversion/converters/test_element_wise.cpp | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/core/conversion/converters/impl/element_wise.cpp b/core/conversion/converters/impl/element_wise.cpp index 32c7050289..8b08a5505a 100644 --- a/core/conversion/converters/impl/element_wise.cpp +++ b/core/conversion/converters/impl/element_wise.cpp @@ -390,6 +390,18 @@ auto element_wise_registrations TORCHTRT_UNUSED = LOG_DEBUG("Output tensor shape: " << out->getDimensions()); return true; }}) + .pattern( + {"aten::square(Tensor self) -> Tensor", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + auto self = args[0].ITensorOrFreeze(ctx); + auto mul = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kPROD, self, self, util::node_info(n)); + TORCHTRT_CHECK(mul, "Unable to create mul layer from node: " << *n); + + mul->setName(util::node_info(n).c_str()); + auto out = ctx->AssociateValueAndTensor(n->outputs()[0], mul->getOutput(0)); + LOG_DEBUG("Output tensor shape: " << out->getDimensions()); + return true; + }}) .pattern( {"aten::mul.Tensor(Tensor self, Tensor other) -> Tensor", [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { diff --git a/tests/core/conversion/converters/test_element_wise.cpp b/tests/core/conversion/converters/test_element_wise.cpp index 3ecfdb2019..6b1c26bbab 100644 --- a/tests/core/conversion/converters/test_element_wise.cpp +++ b/tests/core/conversion/converters/test_element_wise.cpp @@ -145,6 +145,14 @@ TEST(Converters, ATenMulConvertsCorrectly) { pointwise_test_helper(graph, false, true, {5}, {5}, false, at::kInt, at::kFloat); } +TEST(Converters, ATenSquareConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor): + %1 : Tensor = aten::square(%0) + return (%1))IR"; + pointwise_test_helper(graph, true); +} + TEST(Converters, ATenMulWithScalarConvertsCorrectly) { const auto graph = R"IR( graph(%0 : Tensor): From e74dbd7bbc7d6107976239572240d63be9d5907e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Aug 2022 19:56:33 +0000 Subject: [PATCH 10/31] chore(deps): bump @actions/core in /.github/actions/assigner Bumps [@actions/core](https://github.com/actions/toolkit/tree/HEAD/packages/core) from 1.8.2 to 1.9.1. - [Release notes](https://github.com/actions/toolkit/releases) - [Changelog](https://github.com/actions/toolkit/blob/main/packages/core/RELEASES.md) - [Commits](https://github.com/actions/toolkit/commits/HEAD/packages/core) --- updated-dependencies: - dependency-name: "@actions/core" dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/actions/assigner/package-lock.json | 33 ++++++++++++++++------ .github/actions/assigner/package.json | 2 +- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/.github/actions/assigner/package-lock.json b/.github/actions/assigner/package-lock.json index 6807c4ed4e..9d077a8546 100644 --- a/.github/actions/assigner/package-lock.json +++ b/.github/actions/assigner/package-lock.json @@ -9,7 +9,7 @@ "version": "1.0.0", "license": "BSD-3-Clause", "dependencies": { - "@actions/core": "^1.8.2", + "@actions/core": "^1.9.1", "@actions/github": "^5.0.3", "fs": "^0.0.1-security", "js-yaml": "^4.1.0" @@ -22,11 +22,12 @@ } }, "node_modules/@actions/core": { - "version": "1.8.2", - "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.8.2.tgz", - "integrity": "sha512-FXcBL7nyik8K5ODeCKlxi+vts7torOkoDAKfeh61EAkAy1HAvwn9uVzZBY0f15YcQTcZZ2/iSGBFHEuioZWfDA==", + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.9.1.tgz", + "integrity": "sha512-5ad+U2YGrmmiw6du20AQW5XuWo7UKN2052FjSV7MX+Wfjf8sCqcsZe62NfgHys4QI4/Y+vQvLKYL8jWtA1ZBTA==", "dependencies": { - "@actions/http-client": "^2.0.1" + "@actions/http-client": "^2.0.1", + "uuid": "^8.3.2" } }, "node_modules/@actions/github": { @@ -276,6 +277,14 @@ "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" }, + "node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -298,11 +307,12 @@ }, "dependencies": { "@actions/core": { - "version": "1.8.2", - "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.8.2.tgz", - "integrity": "sha512-FXcBL7nyik8K5ODeCKlxi+vts7torOkoDAKfeh61EAkAy1HAvwn9uVzZBY0f15YcQTcZZ2/iSGBFHEuioZWfDA==", + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.9.1.tgz", + "integrity": "sha512-5ad+U2YGrmmiw6du20AQW5XuWo7UKN2052FjSV7MX+Wfjf8sCqcsZe62NfgHys4QI4/Y+vQvLKYL8jWtA1ZBTA==", "requires": { - "@actions/http-client": "^2.0.1" + "@actions/http-client": "^2.0.1", + "uuid": "^8.3.2" } }, "@actions/github": { @@ -507,6 +517,11 @@ "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" }, + "uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" + }, "webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/.github/actions/assigner/package.json b/.github/actions/assigner/package.json index f6fa97add8..306ac4b4f6 100644 --- a/.github/actions/assigner/package.json +++ b/.github/actions/assigner/package.json @@ -19,7 +19,7 @@ "license": "BSD-3-Clause", "repository": "https://www.github.com/pytorch/TensorRT", "dependencies": { - "@actions/core": "^1.8.2", + "@actions/core": "^1.9.1", "@actions/github": "^5.0.3", "fs": "^0.0.1-security", "js-yaml": "^4.1.0" From 0591a8d137473417f57e27402bbbfd142d5ef397 Mon Sep 17 00:00:00 2001 From: Bo Wang Date: Thu, 18 Aug 2022 14:49:14 -0700 Subject: [PATCH 11/31] fix: fix misleading skipping partitioning msg Signed-off-by: Bo Wang --- core/compiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 7b58dbb2c1..898f2f1295 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -426,7 +426,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) auto outputIsCollection = conversion::OutputIsCollection(g->block()); if (cfg.partition_info.enabled && (cfg.lower_info.forced_fallback_modules.size() == 0 && - cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)) { + cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) && !outputIsCollection) { LOG_INFO("Skipping partitioning since model is fully supported"); } From b6b690ca0d8d3a4c8c34a27eebf96ef0677e16ec Mon Sep 17 00:00:00 2001 From: Wei Date: Thu, 18 Aug 2022 18:10:31 -0700 Subject: [PATCH 12/31] Changes done internally at Facebook (#1288) bd46e8f292bf68fe6b87d2d5d206c89fda79a746 Shirong Wu Disable group ln fuse pass 6ce1d3bc19d75b266e99355c96daeff7054dcbf8 Wei Wei [fx2trt] set logging level to INFO at fx root 9d552dc3f69db9e4a249f80ef00803a9413e5d38 Wei Wei [fx2trt] change OSS method lower_to_trt() to compile() Co-authored-by: wwei6 --- .../getting_started_with_fx_path.rst | 6 +-- examples/fx/lower_example.py | 4 +- examples/fx/torchdynamo_example.py | 4 +- py/torch_tensorrt/_compile.py | 4 +- py/torch_tensorrt/fx/__init__.py | 4 ++ py/torch_tensorrt/fx/lower.py | 40 ++++++++++--------- py/torch_tensorrt/fx/lower_setting.py | 4 ++ .../fx/test/converters/acc_op/test_type_as.py | 3 +- .../fx/test/passes/test_graph_opts.py | 2 - .../fx/test/trt_lower/test_diagnostics.py | 2 - .../fx/tools/trt_profiler_sorted.py | 5 +++ 11 files changed, 46 insertions(+), 32 deletions(-) diff --git a/docsrc/tutorials/getting_started_with_fx_path.rst b/docsrc/tutorials/getting_started_with_fx_path.rst index d54f3d91af..eb39fc0eef 100644 --- a/docsrc/tutorials/getting_started_with_fx_path.rst +++ b/docsrc/tutorials/getting_started_with_fx_path.rst @@ -34,7 +34,7 @@ Torch-TensorRT (FX Path) is in ``Beta`` phase and always recommended to work wit Converting a PyTorch Model to TensorRT Engine --------------------------------------------- -In general, users are welcome to use the ``lower_to_trt()`` to finish the conversion from a model to tensorRT engine. It is a wrapper API that consists of the major steps needed to finish this converison. Please refer to ``lower_example.py`` file in ``examples/fx``. +In general, users are welcome to use the ``compile()`` to finish the conversion from a model to tensorRT engine. It is a wrapper API that consists of the major steps needed to finish this converison. Please refer to ``lower_example.py`` file in ``examples/fx``. In this section, we will go through an example to illustrate the major steps that FX path uses. Users can refer to ``fx2trt_example.py`` file in ``examples/fx``. @@ -60,9 +60,9 @@ symbolically traced variables cannot be used as inputs to control flow This means the model contains dynamic control flow. Please refer to section “Dynamic Control Flow” in `FX guide `_. * **Step 2: Build TensorRT engine** -There are `two different modes `_ for how TensorRT handles batch dimension, explicit batch dimension and implicit batch dimension. This mode was used by early versions of TensorRT, and is now deprecated but continues to be supported for backwards compatibility. In explicit batch mode, all dimensions are explicit and can be dynamic, that is their length can change at execution time. Many new features, such as dynamic shapes and loops, are available only in this mode. User can still choose to use implicit batch mode when they set ``explicit_batch_dimension=False`` in ``lower_to_trt()``. We do not recommend to use it since it will lack of support in future TensorRT versions. +There are `two different modes `_ for how TensorRT handles batch dimension, explicit batch dimension and implicit batch dimension. This mode was used by early versions of TensorRT, and is now deprecated but continues to be supported for backwards compatibility. In explicit batch mode, all dimensions are explicit and can be dynamic, that is their length can change at execution time. Many new features, such as dynamic shapes and loops, are available only in this mode. User can still choose to use implicit batch mode when they set ``explicit_batch_dimension=False`` in ``compile()``. We do not recommend to use it since it will lack of support in future TensorRT versions. -Explicit batch is the default mode and it must be set for dynamic shape. For most of vision task, user can choose to enable ``dynamic_batch`` in ``lower_to_trt()`` if they want to get the similar effects as implicit mode where only batch dimension changes. It has some requirements: +Explicit batch is the default mode and it must be set for dynamic shape. For most of vision task, user can choose to enable ``dynamic_batch`` in ``compile()`` if they want to get the similar effects as implicit mode where only batch dimension changes. It has some requirements: 1. Shapes of inputs, outputs and activations are fixed except batch dimension. 2. Inputs, outputs and activations have batch dimension as the major dimension. 3. All the operators in the model do not modify batch dimension (permute, transpose, split, etc.) or compute over batch dimension (sum, softmax, etc.). diff --git a/examples/fx/lower_example.py b/examples/fx/lower_example.py index 71f15a2f88..7f3b374f44 100644 --- a/examples/fx/lower_example.py +++ b/examples/fx/lower_example.py @@ -4,7 +4,7 @@ import torch import torchvision -from torch_tensorrt.fx.lower import lower_to_trt +from torch_tensorrt.fx.lower import compile from torch_tensorrt.fx.utils import LowerPrecision @@ -183,7 +183,7 @@ def run_configuration_benchmark( time = benchmark_torch_function(conf.batch_iter, lambda: module(*input)) elif not conf.jit: # Run lowering eager mode benchmark - lowered_module = lower_to_trt( + lowered_module = compile( module, input, max_batch_size=conf.batch_size, diff --git a/examples/fx/torchdynamo_example.py b/examples/fx/torchdynamo_example.py index 6bb93f6d6e..a2e7627800 100644 --- a/examples/fx/torchdynamo_example.py +++ b/examples/fx/torchdynamo_example.py @@ -5,7 +5,7 @@ import torch import torchdynamo import torchvision -from torch_tensorrt.fx.lower import lower_to_trt +from torch_tensorrt.fx.lower import compile from torch_tensorrt.fx.utils import LowerPrecision from torchdynamo.optimizations import backends @@ -197,7 +197,7 @@ def run_configuration_benchmark( if conf.trt: # Run lowering eager mode benchmark - lowered_module = lower_to_trt( + lowered_module = compile( module, input, max_batch_size=conf.batch_size, diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index f6487a4402..8b5f235531 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -7,7 +7,7 @@ from enum import Enum import torch_tensorrt.fx -from torch_tensorrt.fx.lower import lower_to_trt +import torch_tensorrt.fx.lower from torch_tensorrt.fx.utils import LowerPrecision @@ -140,7 +140,7 @@ def compile( else: raise ValueError(f"Precision {enabled_precisions} not supported on FX") - return lower_to_trt( + return torch_tensorrt.fx.lower.compile( module, inputs, lower_precision=lower_precision, diff --git a/py/torch_tensorrt/fx/__init__.py b/py/torch_tensorrt/fx/__init__.py index aeae62d86d..c1c42c446f 100644 --- a/py/torch_tensorrt/fx/__init__.py +++ b/py/torch_tensorrt/fx/__init__.py @@ -1,4 +1,6 @@ from .converters import * # noqa: F403 F401 +import logging + from .converter_registry import ( # noqa CONVERTERS, NO_EXPLICIT_BATCH_DIM_SUPPORT, @@ -9,3 +11,5 @@ from .input_tensor_spec import generate_input_specs, InputTensorSpec # noqa from .lower_setting import LowerSetting # noqa from .trt_module import TRTModule # noqa + +logging.basicConfig(level=logging.INFO) diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py index 387b4db841..deeee14178 100644 --- a/py/torch_tensorrt/fx/lower.py +++ b/py/torch_tensorrt/fx/lower.py @@ -25,7 +25,7 @@ Input = Sequence[Any] -def lower_to_trt( +def compile( module: nn.Module, input, max_batch_size: int = 2048, @@ -216,28 +216,32 @@ def create( ) ) - @decorate_method(validate_inference(atol=1e-1, rtol=1e-1)) def __call__( self, module: nn.Module, inputs: Input, additional_inputs: Optional[Input] = None, ) -> nn.Module: - module.eval() - - if ( - self.lower_pass_manager_builder.lower_setting.lower_precision - == LowerPrecision.FP16 - ): - module.half() - inputs = tuple( - x.half() if x is not None and x.dtype == torch.float32 else x - for x in inputs + lower_setting = self.lower_pass_manager_builder.lower_setting + atol = lower_setting.correctness_atol + rtol = lower_setting.correctness_rtol + + @validate_inference(atol=atol, rtol=rtol) + def do_lower(module: nn.Module, inputs: Input) -> nn.Module: + module.eval() + if ( + self.lower_pass_manager_builder.lower_setting.lower_precision + == LowerPrecision.FP16 + ): + module.half() + inputs = tuple( + x.half() if x is not None and x.dtype == torch.float32 else x + for x in inputs + ) + pm = self.lower_pass_manager_builder.build_trt_lower_pipeline( + inputs, additional_inputs ) - pm = self.lower_pass_manager_builder.build_trt_lower_pipeline( - inputs, additional_inputs - ) - - lower_result = pm(module) + lower_result = pm(module) + return lower_result - return lower_result + return do_lower(module, inputs) diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py index c1d02229e3..b4ad86caee 100644 --- a/py/torch_tensorrt/fx/lower_setting.py +++ b/py/torch_tensorrt/fx/lower_setting.py @@ -70,6 +70,8 @@ class LowerSetting(LowerSettingBasic): dynamic_batch: enable the dynamic shape in TRT with dim=-1 for the 1st dimension. tactic_sources: tactic sources for TensorRT kernel selection. Default to None, meaning all possible tactic sources. + correctness_atol: absolute tolerance for correctness check + correctness_rtol: relative tolerance for correctness check """ input_specs: List[InputTensorSpec] = dc.field(default_factory=list) @@ -90,3 +92,5 @@ class LowerSetting(LowerSettingBasic): opt_profile_replica: int = 1 dynamic_batch: bool = True tactic_sources: Optional[int] = None + correctness_atol: float = 0.1 + correctness_rtol: float = 0.1 diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py index 0bfffd210f..1f3a39d836 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py @@ -1,5 +1,6 @@ -import torch import unittest + +import torch import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec diff --git a/py/torch_tensorrt/fx/test/passes/test_graph_opts.py b/py/torch_tensorrt/fx/test/passes/test_graph_opts.py index 9db4183e64..c91c456eb3 100644 --- a/py/torch_tensorrt/fx/test/passes/test_graph_opts.py +++ b/py/torch_tensorrt/fx/test/passes/test_graph_opts.py @@ -8,8 +8,6 @@ import torch_tensorrt.fx.tracer.acc_tracer.acc_tracer as acc_tracer from torch_tensorrt.fx.passes.graph_opts import common_subexpression_elimination -_LOGGER: logging.Logger = logging.getLogger(__name__) - _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py index e23ab5dd81..3ce3b7ade8 100644 --- a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py +++ b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py @@ -10,8 +10,6 @@ import torch_tensorrt.fx.diagnostics as diag -_LOGGER: logging.Logger = logging.getLogger(__name__) - _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py b/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py index 59d2f49042..ac0a02ac1d 100644 --- a/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py +++ b/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py @@ -37,6 +37,11 @@ def profile_trt_module( layer_info = json.loads(trt_mod.get_layer_info()) # pyre-ignore[29] shape_map = {} for layer in layer_info["Layers"]: + # if type is str, it means verbose_profile is off in interpreter.run() + # Theorectically, we can print profiling information without shape information + # but we choose to not print profiling information so we can use verbose_profile to control it + if type(layer) is str: + return name = layer["Name"] input_str = ", ".join( [str(x.get("Dimensions", "[]")) for x in layer.get("Inputs", [])] From 3b910caa73aab4e92941e34a26711211f686f643 Mon Sep 17 00:00:00 2001 From: Anurag Dixit Date: Thu, 18 Aug 2022 19:02:21 -0700 Subject: [PATCH 13/31] chore: Truncate long and double for ptq CPP path Signed-off-by: Anurag Dixit --- examples/int8/ptq/main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/int8/ptq/main.cpp b/examples/int8/ptq/main.cpp index 4cc38148b6..e69227fb5a 100644 --- a/examples/int8/ptq/main.cpp +++ b/examples/int8/ptq/main.cpp @@ -50,6 +50,7 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M compile_spec.enabled_precisions.insert(torch::kI8); /// Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; + compile_spec.truncate_long_and_double = true; #ifdef SAVE_ENGINE std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl; From 149033c529749f7be5bb3368fa694f2267c54a91 Mon Sep 17 00:00:00 2001 From: Anurag Dixit Date: Thu, 18 Aug 2022 19:03:37 -0700 Subject: [PATCH 14/31] chore: Fix dataloader in finetune_qat script Signed-off-by: Anurag Dixit --- examples/int8/training/vgg16/finetune_qat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/int8/training/vgg16/finetune_qat.py b/examples/int8/training/vgg16/finetune_qat.py index 48709d9f8a..6ec20a9a46 100644 --- a/examples/int8/training/vgg16/finetune_qat.py +++ b/examples/int8/training/vgg16/finetune_qat.py @@ -261,7 +261,7 @@ def main(): state = ckpt["state"] data = iter(training_dataloader) - images, _ = data.next() + images, _ = next(data) writer.add_graph(model, images.cuda()) writer.close() From ec2e2bfa9273b998b3df2156301c2e65521ae8d4 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Fri, 19 Aug 2022 13:52:36 -0700 Subject: [PATCH 15/31] docs: Update docgen task Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .github/workflows/docgen.yml | 51 +++++- docsrc/Makefile | 4 +- docsrc/WORKSPACE.docs | 0 docsrc/{tutorials => cli}/torchtrtc.rst | 0 .../getting_started_with_cpp_api.rst | 6 +- .../getting_started_with_python_api.rst | 17 +- .../installation.rst | 0 docsrc/index.rst | 67 ++++---- docsrc/py_api/fx.rst | 31 ++++ docsrc/py_api/torch_tensorrt.rst | 1 + .../getting_started_with_fx_path.rst | 43 ++--- docsrc/tutorials/notebooks.rst | 154 ++++++++++++++++++ py/requirements.txt | 5 +- py/torch_tensorrt/fx/__init__.py | 1 + 14 files changed, 308 insertions(+), 72 deletions(-) delete mode 100644 docsrc/WORKSPACE.docs rename docsrc/{tutorials => cli}/torchtrtc.rst (100%) rename docsrc/{tutorials => getting_started}/getting_started_with_cpp_api.rst (99%) rename docsrc/{tutorials => getting_started}/getting_started_with_python_api.rst (59%) rename docsrc/{tutorials => getting_started}/installation.rst (100%) create mode 100644 docsrc/py_api/fx.rst create mode 100644 docsrc/tutorials/notebooks.rst diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index c1d6f0944c..147625e51d 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -10,9 +10,9 @@ on: jobs: build-docs: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 container: - image: docker.pkg.github.com/pytorch/tensorrt/docgen:latest + image: nvidia/cuda:11.3.1-devel-ubuntu20.04 credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -22,23 +22,64 @@ jobs: rm -rf /usr/share/dotnet rm -rf /opt/ghc rm -rf "/usr/local/share/boost" + rm -rf /usr/local/cuda/cuda-* + - name: Install Python + run: + apt update + apt install -y gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc + mkdir -p /opt/circleci + git clone https://github.com/pyenv/pyenv.git /opt/circleci/.pyenv + export PYENV_ROOT="/opt/circleci/.pyenv" + export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" + pyenv install 3.9.4 + pyenv global 3.9.4 + python3 -m pip install --upgrade pip + python3 -m pip install wheel - uses: actions/checkout@v2 with: ref: ${{github.head_ref}} - name: Get HEAD SHA id: vars run: echo "::set-output name=sha::$(git rev-parse --short HEAD)" + - name: Get Bazel version + id: bazel_info + run: echo "::set-output name=version::$(cat .bazelversion)" + - name: Install Bazel + run: | + wget -q https://github.com/bazelbuild/bazel/releases/download/${{ steps.bazel_info.outputs.version }}/bazel-${{ steps.bazel_info.outputs.version }}-linux-x86_64 -O /usr/bin/bazel + chmod a+x /usr/bin/bazel + - name: Install cudnn + tensorrt + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin + mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35 + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" + apt-get update + apt-get install -y libcudnn8 libcudnn8-dev + + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" + apt-get update + + apt-get install -y libnvinfer8 libnvinfer-plugin8 libnvinfer-dev libnvinfer-plugin-dev + - name: Install Torch + run: | + python3 -m pip install -r py/requirements.txt - name: Build Python Package run: | - cp docker/WORKSPACE.docker WORKSPACE + cp toolchains/ci_workspaces/WORKSPACE.x86_64 WORKSPACE cd py - python3 setup.py install + pip install -e . + cd .. - name: Generate New Docs run: | cd docsrc - pip3 install -r requirements.txt + python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" make html + cd .. - uses: stefanzweifel/git-auto-commit-action@v4 with: # Required diff --git a/docsrc/Makefile b/docsrc/Makefile index 18c52c578b..0ea6796ed8 100644 --- a/docsrc/Makefile +++ b/docsrc/Makefile @@ -37,8 +37,8 @@ endif rm -rf $(SOURCEDIR)/_tmp html: - mkdir -p $(SOURCEDIR)/_notebooks - cp -r $(SOURCEDIR)/../notebooks/*.ipynb $(SOURCEDIR)/_notebooks +# mkdir -p $(SOURCEDIR)/_notebooks +# cp -r $(SOURCEDIR)/../notebooks/*.ipynb $(SOURCEDIR)/_notebooks @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) mkdir -p $(DESTDIR) cp -r $(BUILDDIR)/html/* $(DESTDIR) diff --git a/docsrc/WORKSPACE.docs b/docsrc/WORKSPACE.docs deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/docsrc/tutorials/torchtrtc.rst b/docsrc/cli/torchtrtc.rst similarity index 100% rename from docsrc/tutorials/torchtrtc.rst rename to docsrc/cli/torchtrtc.rst diff --git a/docsrc/tutorials/getting_started_with_cpp_api.rst b/docsrc/getting_started/getting_started_with_cpp_api.rst similarity index 99% rename from docsrc/tutorials/getting_started_with_cpp_api.rst rename to docsrc/getting_started/getting_started_with_cpp_api.rst index 41b8a448c0..7f7f60a669 100644 --- a/docsrc/tutorials/getting_started_with_cpp_api.rst +++ b/docsrc/getting_started/getting_started_with_cpp_api.rst @@ -1,7 +1,7 @@ -.. _getting_started: +.. _getting_started_cpp: -Getting Started with C++ -======================== +Using Torch-TensorRT in C++ +============================== If you haven't already, acquire a tarball of the library by following the instructions in :ref:`Installation` diff --git a/docsrc/tutorials/getting_started_with_python_api.rst b/docsrc/getting_started/getting_started_with_python_api.rst similarity index 59% rename from docsrc/tutorials/getting_started_with_python_api.rst rename to docsrc/getting_started/getting_started_with_python_api.rst index 62d5d07b80..fece176156 100644 --- a/docsrc/tutorials/getting_started_with_python_api.rst +++ b/docsrc/getting_started/getting_started_with_python_api.rst @@ -3,8 +3,16 @@ Using Torch-TensorRT in Python ******************************* -Torch-TensorRT Python API accepts a ```torch.nn.Module`` as an input. Under the hood, it uses ``torch.jit.script`` to convert the input module into a -TorchScript module. To compile your input ```torch.nn.Module`` with Torch-TensorRT, all you need to do is provide the module and inputs +The Torch-TensorRT Python API supports a number of unique usecases compared to the CLI and C++ APIs which solely support TorchScript compilation. + +Torch-TensorRT Python API can accept a ``torch.nn.Module``, ``torch.jit.ScriptModule``, or ``torch.fx.GraphModule`` as an input. +Depending on what is provided one of the two frontends (TorchScript or FX) will be selected to compile the module. Provided the +module type is supported, users may explicitly set which frontend they would like to use using the ``ir`` flag for ``compile``. +If given a ``torch.nn.Module`` and the ``ir`` flag is set to either ``default`` or ``torchscript`` the module will be run through +``torch.jit.script`` to convert the input module into a TorchScript module. + + +To compile your input ``torch.nn.Module`` with Torch-TensorRT, all you need to do is provide the module and inputs to Torch-TensorRT and you will be returned an optimized TorchScript module to run or add into another PyTorch module. Inputs is a list of ``torch_tensorrt.Input`` classes which define input's shape, datatype and memory format. You can also specify settings such as operating precision for the engine or target device. After compilation you can save the module just like any other module @@ -46,6 +54,5 @@ to load in a deployment application. In order to load a TensorRT/TorchScript mod input_data = input_data.to("cuda").half() result = trt_ts_module(input_data) -Torch-TensorRT python API also provides ``torch_tensorrt.ts.compile`` which accepts a TorchScript module as input. -The torchscript module can be obtained via scripting or tracing (refer to :ref:`creating_torchscript_module_in_python`). ``torch_tensorrt.ts.compile`` accepts a Torchscript module -and a list of ``torch_tensorrt.Input`` classes. +Torch-TensorRT Python API also provides ``torch_tensorrt.ts.compile`` which accepts a TorchScript module as input and ``torch_tensorrt.fx.compile`` which accepts a FX GraphModule as input. + diff --git a/docsrc/tutorials/installation.rst b/docsrc/getting_started/installation.rst similarity index 100% rename from docsrc/tutorials/installation.rst rename to docsrc/getting_started/installation.rst diff --git a/docsrc/index.rst b/docsrc/index.rst index b12d6cc1f4..c0de8dd400 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -22,46 +22,43 @@ More Information / System Architecture: Getting Started ---------------- * :ref:`installation` -* :ref:`getting_started` +* :ref:`getting_started_with_python_api` +* :ref:`getting_started_cpp` + +.. toctree:: + :caption: Getting Started + :maxdepth: 1 + :hidden: + + getting_started/installation + getting_started/getting_started_with_python_api + getting_started/getting_started_with_cpp_api + + +Tutorials +------------ +* :ref:`creating_a_ts_mod` +* :ref:`getting_started_with_fx` * :ref:`ptq` -* :ref:`torchtrtc` -* :ref:`use_from_pytorch` * :ref:`runtime` -* :ref:`using_dla` * :ref:`serving_torch_tensorrt_with_triton` -* :ref:`user_guide` +* :ref:`use_from_pytorch` +* :ref:`using_dla` +* :ref:`notebooks` .. toctree:: - :caption: Getting Started + :caption: Tutorials :maxdepth: 1 :hidden: - tutorials/installation - tutorials/getting_started_with_cpp_api - tutorials/getting_started_with_python_api tutorials/creating_torchscript_module_in_python + tutorials/getting_started_with_fx_path tutorials/ptq - tutorials/torchtrtc - tutorials/use_from_pytorch tutorials/runtime - tutorials/using_dla tutorials/serving_torch_tensorrt_with_triton - tutorials/getting_started_with_fx_path - -.. toctree:: - :caption: Notebooks - :maxdepth: 1 - :hidden: - - _notebooks/CitriNet-example - _notebooks/dynamic-shapes - _notebooks/EfficientNet-example - _notebooks/Hugging-Face-BERT - _notebooks/lenet-getting-started - _notebooks/Resnet50-example - _notebooks/ssd-object-detection-demo - _notebooks/vgg-qat - + tutorials/use_from_pytorch + tutorials/using_dla + tutorials/notebooks Python API Documenation ------------------------ @@ -69,6 +66,7 @@ Python API Documenation * :ref:`torch_tensorrt_logging_py` * :ref:`torch_tensorrt_ptq_py` * :ref:`torch_tensorrt_ts_py` +* :ref:`torch_tensorrt_fx_py` .. toctree:: :caption: Python API Documenation @@ -79,6 +77,7 @@ Python API Documenation py_api/logging py_api/ptq py_api/ts + py_api/fx C++ API Documenation ---------------------- @@ -99,6 +98,18 @@ C++ API Documenation _cpp_api/namespace_torch_tensorrt__torchscript _cpp_api/namespace_torch_tensorrt__ptq +CLI Documentation +--------------------- +* :ref:`torchtrtc` + +.. toctree:: + :caption: CLI Documenation + :maxdepth: 0 + :hidden: + + cli/torchtrtc + + Contributor Documentation -------------------------------- * :ref:`system_overview` diff --git a/docsrc/py_api/fx.rst b/docsrc/py_api/fx.rst new file mode 100644 index 0000000000..8ce591f5ee --- /dev/null +++ b/docsrc/py_api/fx.rst @@ -0,0 +1,31 @@ +.. _torch_tensorrt_fx_py: + +torch_tensorrt.fx +=================== + +.. currentmodule:: torch_tensorrt.fx + +.. automodule torch_tensorrt.ts + :undoc-members: + +.. automodule:: torch_tensorrt.fx + :members: + :undoc-members: + :show-inheritance: + +Functions +------------ + +.. autofunction:: compile + + +Classes +-------- + +.. autoclass:: TRTModule + +.. autoclass:: InputTensorSpec + +.. autoclass:: TRTInterpreter + +.. autoclass:: TRTInterpreterResult \ No newline at end of file diff --git a/docsrc/py_api/torch_tensorrt.rst b/docsrc/py_api/torch_tensorrt.rst index 03f6d8101b..2ccf7c60e2 100644 --- a/docsrc/py_api/torch_tensorrt.rst +++ b/docsrc/py_api/torch_tensorrt.rst @@ -57,3 +57,4 @@ Submodules logging ptq ts + fx diff --git a/docsrc/tutorials/getting_started_with_fx_path.rst b/docsrc/tutorials/getting_started_with_fx_path.rst index eb39fc0eef..d7dc9b0166 100644 --- a/docsrc/tutorials/getting_started_with_fx_path.rst +++ b/docsrc/tutorials/getting_started_with_fx_path.rst @@ -1,42 +1,29 @@ -.. user_guide: -Torch-TensorRT (FX Path) User Guide -======================== -Torch-TensorRT (FX Path) is a tool that can convert a PyTorch model through torch.FX to an TensorRT engine optimized targeting running on Nvidia GPUs. TensorRT is the inference engine developed by Nvidia which composed of various kinds of optimization including kernel fusion, graph optimization, low precision, etc.. -This tool is developed in Python environment providing most usability to researchers and engineers. There are a few stages that a user want to use this tool and we will introduce them here. - - -Installation ------------- -Torch-TensorRT (FX Path) is in ``Beta`` phase and always recommended to work with PyTorch nightly. +.. _getting_started_with_fx: +Torch-TensorRT (FX Frontend) User Guide +======================== +Torch-TensorRT (FX Frontend) is a tool that can convert a PyTorch model through ``torch.fx`` to an +TensorRT engine optimized targeting running on Nvidia GPUs. TensorRT is the inference engine +developed by NVIDIA which composed of various kinds of optimization including kernel fusion, +graph optimization, low precision, etc.. This tool is developed in Python environment which allows this +workflow to be very accessible to researchers and engineers. There are a few stages that a +user want to use this tool and we will introduce them here. -* Method 1. Follow the instrucions for Torch-TensorRT -* Method 2. To install FX path only (Python path) and avoid the C++ build for torchscript path +> Torch-TensorRT (FX Frontend) is in ``Beta`` and currently it is recommended to work with PyTorch nightly. .. code-block:: shell - $ conda create --name python_env python=3.8 - $ conda activate python_env - - # Recommend to install PyTorch 1.12 and later - $ conda install pytorch torchvision torchtext cudatoolkit=11.3 -c pytorch-nightly - - # Install TensorRT python package - $ pip3 install nvidia-pyindex - $ pip3 install nvidia-tensorrt==8.2.4.2 - $ git clone https://github.com/pytorch/TensorRT.git - $ cd TensorRT/py && python setup.py install --fx-only && cd .. - - $ pyton -c "import torch_tensorrt.fx" # Test an example by $ python py/torch_tensorrt/fx/example/lower_example.py Converting a PyTorch Model to TensorRT Engine --------------------------------------------- -In general, users are welcome to use the ``compile()`` to finish the conversion from a model to tensorRT engine. It is a wrapper API that consists of the major steps needed to finish this converison. Please refer to ``lower_example.py`` file in ``examples/fx``. +In general, users are welcome to use the ``compile()`` to finish the conversion from a model to tensorRT engine. It is a +wrapper API that consists of the major steps needed to finish this converison. Please refer to ``lower_example.py`` file in ``examples/fx``. -In this section, we will go through an example to illustrate the major steps that FX path uses. Users can refer to ``fx2trt_example.py`` file in ``examples/fx``. +In this section, we will go through an example to illustrate the major steps that fx path uses. +Users can refer to ``fx2trt_example.py`` file in ``examples/fx``. * **Step 1: Trace the model with acc_tracer** Acc_tracer is a tracer inheritated from FX tracer. It comes with args normalizer to convert all args to kwargs and pass to TRT converters. @@ -276,7 +263,7 @@ In the custom mapper function, we construct an acc op node and return it. The no The last step would be *adding unit test* for the new acc op or mapper function we added. The place to add the unit test is here `test_acc_tracer.py `_. -* **Step 2. Add a new fx2trt converter** +* **Step 2. Add a new converter** All the developed converters for acc ops are all in `acc_op_converter.py `_. It could give you a good example of how the converter is added. diff --git a/docsrc/tutorials/notebooks.rst b/docsrc/tutorials/notebooks.rst new file mode 100644 index 0000000000..df903fc353 --- /dev/null +++ b/docsrc/tutorials/notebooks.rst @@ -0,0 +1,154 @@ +.. _notebooks: + +Example notebooks +=================== + +There exists a number of notebooks which cover specific using specific features and models +with Torch-TensorRT + +Notebooks +------------ + +Compiling CitriNet with Torch-TensorRT +******************************************** + +Citrinet is an acoustic model used for the speech to text recognition task. It is a version +of QuartzNet that extends ContextNet, utilizing subword encoding (via Word Piece tokenization) +and Squeeze-and-Excitation(SE) mechanism and are therefore smaller than QuartzNet models. CitriNet +models take in audio segments and transcribe them to letter, byte pair, or word piece sequences. + +This notebook demonstrates the steps for optimizing a pretrained CitriNet model with Torch-TensorRT, +and running it to test the speedup obtained. + +* `Torch-TensorRT Getting Started - CitriNet `_ + + +Compiling EfficentNet with Torch-TensorRT +******************************************** + +EfficentNet is a feedforward CNN designed to achieve better performance and accuracy than alternative architectures +by using a "scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient". + +This notebook demonstrates the steps for optimizing a pretrained EfficentNet model with Torch-TensorRT, +and running it to test the speedup obtained. + +* `Torch-TensorRT Getting Started - EfficientNet-B0 `_ + + +Masked Language Modeling (MLM) with Hugging Face BERT Transformer accelerated by Torch-TensorRT +************************************************************************************************* + +"BERT is a transformer model pretrained on a large corpus of English data in a self-supervised fashion. +This way, the model learns an inner representation of the English language that can then be used to extract +features useful for downstream tasks: if you have a dataset of labeled sentences for instance, you can train +a standard classifier using the features produced by the BERT model as inputs." (https://huggingface.co/bert-base-uncased) + +This notebook demonstrates the steps for optimizing a pretrained EfficentNet model with Torch-TensorRT, +and running it to test the speedup obtained. + +* `Masked Language Modeling (MLM) with Hugging Face BERT Transformer `_ + + +Serving a model in C++ using Torch-TensorRT +********************************************** + +This example shows how you can load a pretrained ResNet-50 model, convert it to a Torch-TensorRT +optimized model (via the Torch-TensorRT Python API), save the model as a torchscript module, and +then finally load and serve the model with the PyTorch C++ API. + +* `ResNet C++ Serving Example `_ + + +Compiling ResNet50 with Torch-TensorRT +******************************************** + +This notebook demonstrates the steps for compiling a TorchScript module with Torch-TensorRT on a +pretrained ResNet-50 network, and running it to test the speedup obtained. + +* `Torch-TensorRT Getting Started - ResNet 50 `_ + + +Using Dynamic Shapes with Torch-TensorRT +******************************************** + +Making use of Dynamic Shaped Tensors in Torch TensorRT is quite simple. Let's say you are +using the ``torch_tensorrt.compile(...)`` function to compile a torchscript module. One +of the args in this function in this function is ``input``: which defines an input to a +module in terms of expected shape, data type and tensor format: ``torch_tensorrt.Input.`` + +For the purposes of this walkthrough we just need three kwargs: `min_shape`, `opt_shape`` and `max_shape`. + +.. code-block:: py + + torch_tensorrt.Input( + min_shape=(1, 224, 224, 3), + opt_shape=(1, 512, 512, 3), + max_shape=(1, 1024, 1024, 3), + dtype=torch.int32 + format=torch.channel_last + ) + ... + +In this example, we are going to use a simple ResNet model to demonstrate the use of the API. + +* `Torch-TensorRT - Using Dynamic Shapes `_ + +Using the FX Frontend with Torch-TensorRT +******************************************** + +The purpose of this example is to demostrate the overall flow of lowering a PyTorch model to TensorRT +conveniently with using FX. + +* `Using the FX Frontend with Torch-TensorRT `_ + + +Compiling a PyTorch model using FX Frontend with Torch-TensorRT +******************************************************************* + +The purpose of this example is to demonstrate the overall flow of lowering a PyTorch +model to TensorRT via FX with existing FX based tooling + +* `Compiling a PyTorch model using FX Frontend with Torch-TensorRT `_ + + +Compiling LeNet with Torch-TensorRT +******************************************************************* + +This notebook demonstrates the steps for compiling a TorchScript module with Torch-TensorRT on a simple LeNet network. + +* `Torch-TensorRT Getting Started - LeNet `_ + + +Accelerate Deep Learning Models using Quantization in Torch-TensorRT +******************************************************************* + +Model Quantization is a popular way of optimization which reduces the size of models thereby +accelerating inference, also opening up the possibilities of deployments on devices with lower +computation power such as Jetson. Simply put, quantization is a process of mapping input values + from a larger set to output values in a smaller set. In this notebook, we illustrate the workflow + that you can adopt while quantizing a deep learning model in Torch-TensorRT. The notebook takes + you through an example of Mobilenetv2 for a classification task on a subset of Imagenet Dataset + called Imagenette which has 10 classes. + +* `Accelerate Deep Learning Models using Quantization in Torch-TensorRT `_ + + +Object Detection with Torch-TensorRT (SSD) +******************************************************************* + +This notebook demonstrates the steps for compiling a TorchScript module with Torch-TensorRT on a pretrained SSD network, and running it to test the speedup obtained. + +* `Object Detection with Torch-TensorRT (SSD) `_ + + +Deploying Quantization Aware Trained models in INT8 using Torch-TensorRT +***************************************************************************** + +Quantization Aware training (QAT) simulates quantization during training by +quantizing weights and activation layers. This will help to reduce the loss in +accuracy when we convert the network trained in FP32 to INT8 for faster inference. +QAT introduces additional nodes in the graph which will be used to learn the dynamic +ranges of weights and activation layers. In this notebook, we illustrate the following +steps from training to inference of a QAT model in Torch-TensorRT. + +* `Deploying Quantization Aware Trained models in INT8 using Torch-TensorRT `_ diff --git a/py/requirements.txt b/py/requirements.txt index 29bd0f92eb..5254a658a4 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -1,3 +1,6 @@ +numpy +pybind11==2.6.2 --extra-index-url https://download.pytorch.org/whl/cu113 torch==1.12.0 -pybind11==2.6.2 +--extra-index-url https://pypi.ngc.nvidia.com +nvidia-tensorrt==8.4.1.5 diff --git a/py/torch_tensorrt/fx/__init__.py b/py/torch_tensorrt/fx/__init__.py index c1c42c446f..493d749d09 100644 --- a/py/torch_tensorrt/fx/__init__.py +++ b/py/torch_tensorrt/fx/__init__.py @@ -11,5 +11,6 @@ from .input_tensor_spec import generate_input_specs, InputTensorSpec # noqa from .lower_setting import LowerSetting # noqa from .trt_module import TRTModule # noqa +from .lower import compile logging.basicConfig(level=logging.INFO) From 12f39acce5f4a15dd3048f54d5943e4eccbfac1c Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Fri, 19 Aug 2022 14:07:02 -0700 Subject: [PATCH 16/31] chore: apply linting Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- core/compiler.cpp | 3 ++- tests/core/conversion/converters/test_select.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 898f2f1295..bf128b714a 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -426,7 +426,8 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) auto outputIsCollection = conversion::OutputIsCollection(g->block()); if (cfg.partition_info.enabled && (cfg.lower_info.forced_fallback_modules.size() == 0 && - cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) && !outputIsCollection) { + cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) && + !outputIsCollection) { LOG_INFO("Skipping partitioning since model is fully supported"); } diff --git a/tests/core/conversion/converters/test_select.cpp b/tests/core/conversion/converters/test_select.cpp index d77fa37d40..e9405c0155 100644 --- a/tests/core/conversion/converters/test_select.cpp +++ b/tests/core/conversion/converters/test_select.cpp @@ -865,7 +865,7 @@ TEST(Converters, ScatterValueConvertsCorrectly) { %5 : NoneType = prim::Constant() %6 : bool = prim::Constant[value=0]() %7 : int = prim::Constant[value=4]() - %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) + %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) %10 : Tensor = aten::scatter(%data, %dim, %index, %value) return (%10))IR"; @@ -900,7 +900,7 @@ TEST(Converters, ScatterSrcConvertsCorrectly) { %5 : NoneType = prim::Constant() %6 : bool = prim::Constant[value=0]() %7 : int = prim::Constant[value=4]() - %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) + %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) %10 : Tensor = aten::scatter(%data, %dim, %index, %src) return (%10))IR"; From cfd816f04eecadd81ec62ba6051672cef5b111d2 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:18:38 -0700 Subject: [PATCH 17/31] Update docgen.yml --- .github/workflows/docgen.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 147625e51d..d967248e11 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -12,7 +12,7 @@ jobs: build-docs: runs-on: ubuntu-20.04 container: - image: nvidia/cuda:11.3.1-devel-ubuntu20.04 + image: ghcr.io/pytorch/tensorrt/docgen:latest credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -88,4 +88,4 @@ jobs: file_pattern: docs/ commit_user_name: Torch-TensorRT Github Bot commit_user_email: torch-tensorrt.github.bot@nvidia.com - commit_author: Torch-TensorRT Github Bot \ No newline at end of file + commit_author: Torch-TensorRT Github Bot From 5d8b2a73e3f53a2b8ec8c6739812c622c6a59a28 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:23:52 -0700 Subject: [PATCH 18/31] Update docgen.yml --- .github/workflows/docgen.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index d967248e11..9d353eb5aa 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -23,10 +23,12 @@ jobs: rm -rf /opt/ghc rm -rf "/usr/local/share/boost" rm -rf /usr/local/cuda/cuda-* - - name: Install Python - run: + - name: Install base deps + run: | apt update apt install -y gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc + - name: Install Python + run: | mkdir -p /opt/circleci git clone https://github.com/pyenv/pyenv.git /opt/circleci/.pyenv export PYENV_ROOT="/opt/circleci/.pyenv" From 7215a67d0c5e4f117e36a749da9f13bafe4152b7 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:29:47 -0700 Subject: [PATCH 19/31] Update docgen.yml --- .github/workflows/docgen.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 9d353eb5aa..0b376f7461 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -26,7 +26,7 @@ jobs: - name: Install base deps run: | apt update - apt install -y gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc + apt install -y software-properties-common gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc - name: Install Python run: | mkdir -p /opt/circleci @@ -52,6 +52,7 @@ jobs: chmod a+x /usr/bin/bazel - name: Install cudnn + tensorrt run: | + apt-get update wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub From 69ba6f8ff2cb68b5af962d9944bef3ebb902cfda Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:34:31 -0700 Subject: [PATCH 20/31] Update docgen.yml --- .github/workflows/docgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 0b376f7461..cc9793f0c1 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -26,7 +26,7 @@ jobs: - name: Install base deps run: | apt update - apt install -y software-properties-common gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc + DEBIAN_FRONTEND=noninteractive apt install -y software-properties-common gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc - name: Install Python run: | mkdir -p /opt/circleci From 110d425089c88cf10f299bef52b9bd962253175d Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:52:12 -0700 Subject: [PATCH 21/31] Update docgen.yml --- .github/workflows/docgen.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index cc9793f0c1..21872ade60 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -69,16 +69,25 @@ jobs: apt-get install -y libnvinfer8 libnvinfer-plugin8 libnvinfer-dev libnvinfer-plugin-dev - name: Install Torch run: | + export PYENV_ROOT="/opt/circleci/.pyenv" + export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" + pyenv global 3.9.4 python3 -m pip install -r py/requirements.txt - name: Build Python Package run: | cp toolchains/ci_workspaces/WORKSPACE.x86_64 WORKSPACE cd py - pip install -e . + export PYENV_ROOT="/opt/circleci/.pyenv" + export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" + pyenv global 3.9.4 + python3 -m pip install -e . cd .. - name: Generate New Docs run: | cd docsrc + export PYENV_ROOT="/opt/circleci/.pyenv" + export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" + pyenv global 3.9.4 python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" make html From 7836ce233816a9fd3b6dfae920f0f2c27389bb4f Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 15:18:55 -0700 Subject: [PATCH 22/31] Update docgen.yml --- .github/workflows/docgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 21872ade60..b2a9d77290 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -80,7 +80,7 @@ jobs: export PYENV_ROOT="/opt/circleci/.pyenv" export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" pyenv global 3.9.4 - python3 -m pip install -e . + python3 setup.py install cd .. - name: Generate New Docs run: | From 10325f14a58fa495a937fbb7f4ac491e4c4354c6 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Fri, 19 Aug 2022 15:31:18 -0700 Subject: [PATCH 23/31] Update docgen.yml --- .github/workflows/docgen.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index b2a9d77290..8a3fd24717 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -27,6 +27,7 @@ jobs: run: | apt update DEBIAN_FRONTEND=noninteractive apt install -y software-properties-common gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc + git config --global --add safe.directory '*' - name: Install Python run: | mkdir -p /opt/circleci From d7fd6916444fd77222819f9154dea6386db5b4ed Mon Sep 17 00:00:00 2001 From: Wei Date: Mon, 22 Aug 2022 10:52:43 -0700 Subject: [PATCH 24/31] update fx notebook (#1297) --- ...ng_started_with_fx_path_lower_to_trt.ipynb | 183 +++++++++++++----- 1 file changed, 137 insertions(+), 46 deletions(-) diff --git a/notebooks/getting_started_with_fx_path_lower_to_trt.ipynb b/notebooks/getting_started_with_fx_path_lower_to_trt.ipynb index 5ef957fa36..8e480903ab 100644 --- a/notebooks/getting_started_with_fx_path_lower_to_trt.ipynb +++ b/notebooks/getting_started_with_fx_path_lower_to_trt.ipynb @@ -10,14 +10,14 @@ "bento/extensions/theme/main.css": true }, "kernelspec": { - "display_name": "accelerators", + "display_name": "dper3_pytorch (cinder)", "language": "python", - "name": "bento_kernel_accelerators", + "name": "bento_kernel_dper3_pytorch_cinder", "metadata": { - "kernel_name": "bento_kernel_accelerators", - "nightly_builds": true, + "kernel_name": "bento_kernel_dper3_pytorch_cinder", + "nightly_builds": false, "fbpkg_supported": true, - "cinder_runtime": false, + "cinder_runtime": true, "is_prebuilt": true } }, @@ -32,10 +32,10 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3" }, - "last_server_session_id": "c6f6ab3c-9274-41e7-8592-b1b583442e00", - "last_kernel_id": "fcbf3a69-76a4-4730-9b41-bcd0b24729ca", - "last_base_url": "https://devgpu005.ftw6.facebook.com:8093/", - "last_msg_id": "e28f842c-f32dde25c1b80ef7d423dfee_407", + "last_server_session_id": "24a1a10c-29aa-4e2b-a11f-2b5108fc1e58", + "last_kernel_id": "5f014373-151c-4ee8-8939-4daab994d202", + "last_base_url": "https://devgpu005.ftw6.facebook.com:8091/", + "last_msg_id": "687e81e8-4414f32c89cd026dd1ea3fd9_139", "outputWidgetContext": {} }, "nbformat": 4, @@ -58,14 +58,14 @@ { "cell_type": "code", "metadata": { - "originalKey": "7909785f-b9b4-41dd-82af-c144b879df39", + "originalKey": "7db2accc-9fa4-4a1e-8142-d887f2947bcd", "showInput": true, "customInput": null, "collapsed": false, - "requestMsgId": "7db2accc-9fa4-4a1e-8142-d887f2947bcd", + "requestMsgId": "b5d8efce-0963-4074-bc9d-e8e1a78fd424", "customOutput": null, - "executionStartTime": 1656395936225, - "executionStopTime": 1656395937851 + "executionStartTime": 1661189891682, + "executionStopTime": 1661189891856 }, "source": [ "import typing as t\n", @@ -74,10 +74,10 @@ "\n", "import torch\n", "import torchvision\n", - "from torch_tensorrt.fx.lower import lower_to_trt\n", + "from torch_tensorrt.fx.lower import compile\n", "from torch_tensorrt.fx.utils import LowerPrecision" ], - "execution_count": 4, + "execution_count": 9, "outputs": [] }, { @@ -98,16 +98,16 @@ { "cell_type": "code", "metadata": { - "originalKey": "a4455135-8633-4d2d-bdd3-6435a4a9f4dd", + "originalKey": "2835fffa-cc50-479a-9080-c4f7002c0726", "showInput": true, "customInput": null, "code_folding": [], "hidden_ranges": [], "collapsed": false, - "requestMsgId": "2835fffa-cc50-479a-9080-c4f7002c0726", + "requestMsgId": "6ea72dbf-dbfe-451e-8613-15f87e34a1a5", "customOutput": null, - "executionStartTime": 1656398717455, - "executionStopTime": 1656398717662 + "executionStartTime": 1661189260550, + "executionStopTime": 1661189262039 }, "source": [ "@dataclass\n", @@ -159,24 +159,39 @@ " f\"Accuracy: {self.accuracy_res} (rtol={self.conf.accuracy_rtol})\"\n", " )" ], - "execution_count": 22, - "outputs": [] + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 102740.872 _utils_internal.py:179] NCCL_DEBUG env var is set to None\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 102740.873 _utils_internal.py:188] NCCL_DEBUG is INFO from /etc/nccl.conf\n" + ] + } + ] }, { "cell_type": "markdown", "metadata": { "originalKey": "3e462cf6-d282-402d-955b-a3ecb400bf0b", - "showInput": true, + "showInput": false, "customInput": null, "code_folding": [], "hidden_ranges": [] }, "source": [ "Run FX path lowering and benchmark the given model according to the specified benchmark configuration. Prints the benchmark result for each configuration at the end of the run. `benchmark_torch_function` is the actual function that computes the fixed number of iterations of functions runs.\n", - "The FX path lowering and TensorRT engine creation is integrated into `low_to_trt()` API which is defined in `fx/lower.py` file.\n", + "The FX path lowering and TensorRT engine creation is integrated into `compile()` API which is defined in `fx/lower.py` file.\n", "It is good to list it out and show the usage of it. It takes in original module, input and lowering setting, run lowering workflow to turn module into a executable TRT engine \n", "```\n", - "def lower_to_trt(\n", + "def compile(\n", " module: nn.Module,\n", " input: ,\n", " max_batch_size: int = 2048,\n", @@ -212,22 +227,18 @@ { "cell_type": "code", "metadata": { - "originalKey": "91333212-7f6d-4bde-a248-44d485e83e5e", + "originalKey": "3002935b-b95a-4a08-a57f-f7a35485af5b", "showInput": true, "customInput": null, "code_folding": [], "hidden_ranges": [], "collapsed": false, - "requestMsgId": "3002935b-b95a-4a08-a57f-f7a35485af5b", + "requestMsgId": "dc73f2d0-427b-4f71-bec1-b118cc5642d0", "customOutput": null, - "executionStartTime": 1656397903207, - "executionStopTime": 1656397964752 + "executionStartTime": 1661189697773, + "executionStopTime": 1661189753875 }, "source": [ - "test_model = torchvision.models.resnet18(pretrained=True)\n", - "input = [torch.rand(128, 3, 224, 224)] \n", - "benchmark(test_model, input, 50, 128)\n", - "\n", "def benchmark_torch_function(iters: int, f, *args) -> float:\n", " \"\"\"Estimates the average time duration for a single inference call in second\n", "\n", @@ -266,7 +277,7 @@ " time = benchmark_torch_function(conf.batch_iter, lambda: module(*input))\n", " elif not conf.jit:\n", " # Run lowering eager mode benchmark\n", - " lowered_module = lower_to_trt(\n", + " lowered_module = compile(\n", " module,\n", " input,\n", " max_batch_size=conf.batch_size,\n", @@ -279,6 +290,7 @@ " result = Result(module=module, input=input, conf=conf, time_sec=time)\n", " return result\n", "\n", + "\n", "@torch.inference_mode()\n", "def benchmark(\n", " model,\n", @@ -315,16 +327,25 @@ " ),\n", " ]\n", "\n", - " results = [\n", - " run_configuration_benchmark(deepcopy(model), inputs, conf_)\n", - " for conf_ in configurations\n", - " ]\n", + " results = [run_configuration_benchmark(deepcopy(model), inputs, conf_) for conf_ in configurations]\n", "\n", " for res in results:\n", - " print(res.format())" + " print(res.format())\n", + "\n", + "\n", + "test_model = torchvision.models.resnet18(pretrained=True)\n", + "input = [torch.rand(128, 3, 224, 224)]\n", + "benchmark(test_model, input, 50, 128)" ], - "execution_count": 21, + "execution_count": 8, "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103458.189 manifold.py:1435] URL manifold://torchvision/tree/models/resnet18-f37072fd.pth was already cached in /home/wwei6/.torch/iopath_cache/manifold_cache/tree/models/resnet18-f37072fd.pth\n" + ] + }, { "output_type": "stream", "name": "stdout", @@ -339,25 +360,60 @@ "== End benchmark iterations\n=== Running benchmark for: Configuration(batch_iter=50, batch_size=128, name='TRT FP32 Eager', trt=True, jit=False, fp16=False, accuracy_rtol=0.001) green\n" ] }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103501.297 pass_utils.py:166] == Log pass before/after graph to /tmp/tmpe_7p37fq\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103501.390 pass_utils.py:166] == Log pass before/after graph to /tmp/tmpg_a347f0\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103501.509 lower_pass_manager_builder.py:151] Now lowering submodule _run_on_acc_0\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103501.511 lower.py:89] split_name='_run_on_acc_0' self.lower_setting.input_specs=[InputTensorSpec(shape=torch.Size([128, 3, 224, 224]), dtype=torch.float32, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True)]\n" + ] + }, { "output_type": "stream", "name": "stdout", "text": [ - "== Log pass before/after graph to /tmp/tmpaayayg72\n== Log pass before/after graph to /tmp/tmpdw_pq71j\n\nSupported node types in the model:\nacc_ops.conv2d: ((), {'input': torch.float32, 'weight': torch.float32})\nacc_ops.batch_norm: ((), {'input': torch.float32, 'running_mean': torch.float32, 'running_var': torch.float32, 'weight': torch.float32, 'bias': torch.float32})\nacc_ops.relu: ((), {'input': torch.float32})\nacc_ops.max_pool2d: ((), {'input': torch.float32})\nacc_ops.add: ((), {'input': torch.float32, 'other': torch.float32})\nacc_ops.adaptive_avg_pool2d: ((), {'input': torch.float32})\nacc_ops.flatten: ((), {'input': torch.float32})\nacc_ops.linear: ((), {'input': torch.float32, 'weight': torch.float32, 'bias': torch.float32})\n\nUnsupported node types in the model:\n\nGot 1 acc subgraphs and 0 non-acc subgraphs\n" + "\nSupported node types in the model:\nacc_ops.conv2d: ((), {'input': torch.float32, 'weight': torch.float32})\nacc_ops.batch_norm: ((), {'input': torch.float32, 'running_mean': torch.float32, 'running_var': torch.float32, 'weight': torch.float32, 'bias': torch.float32})\nacc_ops.relu: ((), {'input': torch.float32})\nacc_ops.max_pool2d: ((), {'input': torch.float32})\nacc_ops.add: ((), {'input': torch.float32, 'other': torch.float32})\nacc_ops.adaptive_avg_pool2d: ((), {'input': torch.float32})\nacc_ops.flatten: ((), {'input': torch.float32})\nacc_ops.linear: ((), {'input': torch.float32, 'weight': torch.float32, 'bias': torch.float32})\n\nUnsupported node types in the model:\n\nGot 1 acc subgraphs and 0 non-acc subgraphs\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ - "I0627 233146.650 fx2trt.py:190] Run Module elapsed time: 0:00:00.244369\n" + "I0822 103503.964 fx2trt.py:204] Run Module elapsed time: 0:00:00.435984\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ - "I0627 233206.570 fx2trt.py:241] Build TRT engine elapsed time: 0:00:19.918630\n" + "I0822 103520.647 fx2trt.py:258] Build TRT engine elapsed time: 0:00:16.681226\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103520.658 lower_pass_manager_builder.py:168] Lowering submodule _run_on_acc_0 elapsed time 0:00:19.147071\n" ] }, { @@ -374,25 +430,60 @@ "== End benchmark iterations\n=== Running benchmark for: Configuration(batch_iter=50, batch_size=128, name='TRT FP16 Eager', trt=True, jit=False, fp16=True, accuracy_rtol=0.01) green\n" ] }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103523.067 pass_utils.py:166] == Log pass before/after graph to /tmp/tmpgphlicna\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103523.106 pass_utils.py:166] == Log pass before/after graph to /tmp/tmpy9cumddi\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103523.173 lower_pass_manager_builder.py:151] Now lowering submodule _run_on_acc_0\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103523.174 lower.py:89] split_name='_run_on_acc_0' self.lower_setting.input_specs=[InputTensorSpec(shape=torch.Size([128, 3, 224, 224]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True)]\n" + ] + }, { "output_type": "stream", "name": "stdout", "text": [ - "== Log pass before/after graph to /tmp/tmpnoeblgd5\n== Log pass before/after graph to /tmp/tmpyb1egsof\n\nSupported node types in the model:\nacc_ops.conv2d: ((), {'input': torch.float16, 'weight': torch.float16})\nacc_ops.batch_norm: ((), {'input': torch.float16, 'running_mean': torch.float16, 'running_var': torch.float16, 'weight': torch.float16, 'bias': torch.float16})\nacc_ops.relu: ((), {'input': torch.float16})\nacc_ops.max_pool2d: ((), {'input': torch.float16})\nacc_ops.add: ((), {'input': torch.float16, 'other': torch.float16})\nacc_ops.adaptive_avg_pool2d: ((), {'input': torch.float16})\nacc_ops.flatten: ((), {'input': torch.float16})\nacc_ops.linear: ((), {'input': torch.float16, 'weight': torch.float16, 'bias': torch.float16})\n\nUnsupported node types in the model:\n\nGot 1 acc subgraphs and 0 non-acc subgraphs\n" + "\nSupported node types in the model:\nacc_ops.conv2d: ((), {'input': torch.float16, 'weight': torch.float16})\nacc_ops.batch_norm: ((), {'input': torch.float16, 'running_mean': torch.float16, 'running_var': torch.float16, 'weight': torch.float16, 'bias': torch.float16})\nacc_ops.relu: ((), {'input': torch.float16})\nacc_ops.max_pool2d: ((), {'input': torch.float16})\nacc_ops.add: ((), {'input': torch.float16, 'other': torch.float16})\nacc_ops.adaptive_avg_pool2d: ((), {'input': torch.float16})\nacc_ops.flatten: ((), {'input': torch.float16})\nacc_ops.linear: ((), {'input': torch.float16, 'weight': torch.float16, 'bias': torch.float16})\n\nUnsupported node types in the model:\n\nGot 1 acc subgraphs and 0 non-acc subgraphs\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "I0822 103523.466 fx2trt.py:204] Run Module elapsed time: 0:00:00.288043\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ - "I0627 233208.996 fx2trt.py:190] Run Module elapsed time: 0:00:00.217076\n" + "I0822 103553.687 fx2trt.py:258] Build TRT engine elapsed time: 0:00:30.220316\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ - "I0627 233244.147 fx2trt.py:241] Build TRT engine elapsed time: 0:00:35.150950\n" + "I0822 103553.698 lower_pass_manager_builder.py:168] Lowering submodule _run_on_acc_0 elapsed time 0:00:30.523791\n" ] }, { @@ -406,7 +497,7 @@ "output_type": "stream", "name": "stdout", "text": [ - "== End benchmark iterations\n== Benchmark Result for: Configuration(batch_iter=50, batch_size=128, name='CUDA Eager', trt=False, jit=False, fp16=False, accuracy_rtol=-1)\nBS: 128, Time per iter: 15.00ms, QPS: 8530.72, Accuracy: None (rtol=-1)\n== Benchmark Result for: Configuration(batch_iter=50, batch_size=128, name='TRT FP32 Eager', trt=True, jit=False, fp16=False, accuracy_rtol=0.001)\nBS: 128, Time per iter: 7.95ms, QPS: 16098.45, Accuracy: None (rtol=0.001)\n== Benchmark Result for: Configuration(batch_iter=50, batch_size=128, name='TRT FP16 Eager', trt=True, jit=False, fp16=True, accuracy_rtol=0.01)\nBS: 128, Time per iter: 4.36ms, QPS: 29365.31, Accuracy: None (rtol=0.01)\n" + "== End benchmark iterations\n== Benchmark Result for: Configuration(batch_iter=50, batch_size=128, name='CUDA Eager', trt=False, jit=False, fp16=False, accuracy_rtol=-1)\nBS: 128, Time per iter: 14.66ms, QPS: 8732.53, Accuracy: None (rtol=-1)\n== Benchmark Result for: Configuration(batch_iter=50, batch_size=128, name='TRT FP32 Eager', trt=True, jit=False, fp16=False, accuracy_rtol=0.001)\nBS: 128, Time per iter: 7.27ms, QPS: 17595.70, Accuracy: None (rtol=0.001)\n== Benchmark Result for: Configuration(batch_iter=50, batch_size=128, name='TRT FP16 Eager', trt=True, jit=False, fp16=True, accuracy_rtol=0.01)\nBS: 128, Time per iter: 4.49ms, QPS: 28480.34, Accuracy: None (rtol=0.01)\n" ] } ] From 6e467f23d9007f151d0290ab00fba0eb117ee67b Mon Sep 17 00:00:00 2001 From: Wei Date: Mon, 22 Aug 2022 14:59:14 -0700 Subject: [PATCH 25/31] Changes done internally at Facebook (#1299) bd46e8f292bf68fe6b87d2d5d206c89fda79a746 Shirong Wu Disable group ln fuse pass 6ce1d3bc19d75b266e99355c96daeff7054dcbf8 Wei Wei [fx2trt] set logging level to INFO at fx root 9d552dc3f69db9e4a249f80ef00803a9413e5d38 Wei Wei [fx2trt] change OSS method lower_to_trt() to compile() 6c4bdb8ac5823d161e4afc7c9d295f961aeeb0bf Mor Tzur fix engine holder test binary to fix contbuild_pytorch_fx2trt_build 636d0ab2a3d0f09267e25b8b8e7eedd4d91d791d Yinghai Lu [easy] remove random prints 5a97668307c26e69a89a4e02a535e319eaf3ce3d Wei Wei [ads] sequential linear fuse 508338ab343e407ee49605919508210b62ad9a52 Wei Wei [fx2trt] minor literal fix --- py/torch_tensorrt/fx/__init__.py | 1 - py/torch_tensorrt/fx/lower.py | 2 +- py/torch_tensorrt/fx/passes/pass_utils.py | 47 +++++++++++++++++++ .../fx/tracer/acc_tracer/acc_ops.py | 4 +- 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/py/torch_tensorrt/fx/__init__.py b/py/torch_tensorrt/fx/__init__.py index 493d749d09..c1c42c446f 100644 --- a/py/torch_tensorrt/fx/__init__.py +++ b/py/torch_tensorrt/fx/__init__.py @@ -11,6 +11,5 @@ from .input_tensor_spec import generate_input_specs, InputTensorSpec # noqa from .lower_setting import LowerSetting # noqa from .trt_module import TRTModule # noqa -from .lower import compile logging.basicConfig(level=logging.INFO) diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py index deeee14178..59b59d580f 100644 --- a/py/torch_tensorrt/fx/lower.py +++ b/py/torch_tensorrt/fx/lower.py @@ -53,7 +53,7 @@ def compile( timing_cache_prefix: Timing cache file name for timing cache used by fx2trt. save_timing_cache: Update timing cache with current timing cache data if set to True. cuda_graph_batch_size: Cuda graph batch size, default to be -1. - + dynamic_batch: batch dimension (dim=0) is dynamic. Returns: A torch.nn.Module lowered by TensorRT. """ diff --git a/py/torch_tensorrt/fx/passes/pass_utils.py b/py/torch_tensorrt/fx/passes/pass_utils.py index d430a67408..3fb88e04a9 100644 --- a/py/torch_tensorrt/fx/passes/pass_utils.py +++ b/py/torch_tensorrt/fx/passes/pass_utils.py @@ -102,6 +102,53 @@ def bounded_method(*args, **kwargs): return dec_for_method +def log_perf_before_after(pass_: PassFunc) -> PassFunc: + """ + Wraps a pass function to log perf of the module before and after the pass + """ + + @wraps(pass_) + def check_perf_with_before_after_log( + module: fx.GraphModule, input: Input + ) -> fx.GraphModule: + def benchmark_torch_function(iters: int, f, *args) -> float: + """Estimates the average time duration for a single inference call in second + + If the input is batched, then the estimation is for the batches inference call. + + Args: + iters: number of inference iterations to run + f: a function to perform a single inference call + + Returns: + estimated average time duration in second for a single inference call + """ + with torch.inference_mode(): + f(*args) + torch.cuda.synchronize() + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + # print("== Start benchmark iterations") + with torch.inference_mode(): + start_event.record() + for _ in range(iters): + f(*args) + end_event.record() + torch.cuda.synchronize() + # print("== End benchmark iterations") + return (start_event.elapsed_time(end_event) * 1.0e-3) / iters + + time_before = benchmark_torch_function(100, lambda: module(*input)) + _LOGGER.info(f"[{pass_}] Perf Before(eager mode): {time_before}") + + module = pass_(module, input) + time_after = benchmark_torch_function(100, lambda: module(*input)) + _LOGGER.info(f"[{pass_}] Perf After(eager mode): {time_after}") + return module + + return check_perf_with_before_after_log + + def log_before_after(pass_: PassFunc) -> PassFunc: """ Wraps a pass function to log the module graph before and after the pass diff --git a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py index d1a5322316..ccd572b9aa 100644 --- a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py +++ b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py @@ -778,7 +778,9 @@ def dropout_mapper(node: torch.fx.Node, mod: nn.Module): assert callable(stochastic_depth) except Exception as e: - warnings.warn(f"Unable to import torchvision related libraries.: {e}") + warnings.warn( + f"Unable to import torchvision related libraries.: {e}. Please install torchvision lib in order to lower stochastic_depth" + ) else: @register_custom_acc_mapper_fn( From fbdffdcf8789efd7d9f62757be84a83b4a7896b4 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Mon, 22 Aug 2022 15:00:27 -0700 Subject: [PATCH 26/31] Update docgen.yml --- .github/workflows/docgen.yml | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 8a3fd24717..2a4310d622 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -28,16 +28,10 @@ jobs: apt update DEBIAN_FRONTEND=noninteractive apt install -y software-properties-common gcc git curl wget make zlib1g-dev bzip2 libbz2-dev lzma lzma-dev libreadline-dev libsqlite3-dev libssl-dev libffi-dev doxygen pandoc git config --global --add safe.directory '*' - - name: Install Python - run: | - mkdir -p /opt/circleci - git clone https://github.com/pyenv/pyenv.git /opt/circleci/.pyenv - export PYENV_ROOT="/opt/circleci/.pyenv" - export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" - pyenv install 3.9.4 - pyenv global 3.9.4 - python3 -m pip install --upgrade pip - python3 -m pip install wheel + - name: Set up Python 3.9.4 + uses: actions/setup-python@v2 + with: + python-version: 3.9.4 - uses: actions/checkout@v2 with: ref: ${{github.head_ref}} @@ -70,25 +64,19 @@ jobs: apt-get install -y libnvinfer8 libnvinfer-plugin8 libnvinfer-dev libnvinfer-plugin-dev - name: Install Torch run: | - export PYENV_ROOT="/opt/circleci/.pyenv" - export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" - pyenv global 3.9.4 python3 -m pip install -r py/requirements.txt - name: Build Python Package run: | cp toolchains/ci_workspaces/WORKSPACE.x86_64 WORKSPACE cd py - export PYENV_ROOT="/opt/circleci/.pyenv" - export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" - pyenv global 3.9.4 + echo $(which python3) + mkdir -p /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/ + ln -s /usr/local/lib/python3.8/site-packages /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages python3 setup.py install cd .. - name: Generate New Docs run: | cd docsrc - export PYENV_ROOT="/opt/circleci/.pyenv" - export PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" - pyenv global 3.9.4 python3 -m pip install -r requirements.txt python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)" make html From 399c763f74533bbccfca3a7861817783c1f626ba Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Mon, 22 Aug 2022 15:52:31 -0700 Subject: [PATCH 27/31] Update docgen.yml --- .github/workflows/docgen.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 2a4310d622..28d0752328 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -70,8 +70,9 @@ jobs: cp toolchains/ci_workspaces/WORKSPACE.x86_64 WORKSPACE cd py echo $(which python3) + ls $(which python3)/.. mkdir -p /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/ - ln -s /usr/local/lib/python3.8/site-packages /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages + ln -s /__t/Python/3.9.4/x64/lib/python3.9/site-packages /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages python3 setup.py install cd .. - name: Generate New Docs From d2a81436d8490135cd022776fca999c4f8a126b8 Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Mon, 22 Aug 2022 16:01:58 -0700 Subject: [PATCH 28/31] Update docgen.yml --- .github/workflows/docgen.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 28d0752328..79b9c74124 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -70,9 +70,9 @@ jobs: cp toolchains/ci_workspaces/WORKSPACE.x86_64 WORKSPACE cd py echo $(which python3) - ls $(which python3)/.. + echo $(python3 -c 'import site; print(site.getsitepackages())') mkdir -p /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/ - ln -s /__t/Python/3.9.4/x64/lib/python3.9/site-packages /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages + ln -s $(python3 -c 'import site; print(site.getsitepackages())') /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages python3 setup.py install cd .. - name: Generate New Docs From 0baf50abb128de50afe827b783ae1b542d161dff Mon Sep 17 00:00:00 2001 From: Naren Dasan <1790613+narendasan@users.noreply.github.com> Date: Mon, 22 Aug 2022 16:11:07 -0700 Subject: [PATCH 29/31] Update docgen.yml --- .github/workflows/docgen.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 79b9c74124..7b66b98be5 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -70,9 +70,9 @@ jobs: cp toolchains/ci_workspaces/WORKSPACE.x86_64 WORKSPACE cd py echo $(which python3) - echo $(python3 -c 'import site; print(site.getsitepackages())') + echo $(python3 -c 'import site; print(site.getsitepackages()[0])') mkdir -p /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/ - ln -s $(python3 -c 'import site; print(site.getsitepackages())') /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages + ln -s $(python3 -c 'import site; print(site.getsitepackages()[0])') /opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages python3 setup.py install cd .. - name: Generate New Docs From 0005137d42d4603ecd9a57d6d0071ac21520273d Mon Sep 17 00:00:00 2001 From: Torch-TensorRT Github Bot Date: Mon, 22 Aug 2022 23:38:33 +0000 Subject: [PATCH 30/31] docs: [Automated] Regenerating documenation for 0baf50a Signed-off-by: Torch-TensorRT Github Bot --- .../classtorch__tensorrt_1_1DataType.html | 172 ++- ...rch__tensorrt_1_1Device_1_1DeviceType.html | 170 ++- .../classtorch__tensorrt_1_1TensorFormat.html | 172 ++- ...ensorrt_1_1ptq_1_1Int8CacheCalibrator.html | 170 ++- ...ch__tensorrt_1_1ptq_1_1Int8Calibrator.html | 170 ++- ...8h_1a18d295a837ac71add5578860b55e5502.html | 170 ++- ...8h_1a282fd3c0b1c3a215148ae372070e1268.html | 170 ++- ...8h_1a31398a6d4d27e28817afb0f0139e909e.html | 170 ++- ...8h_1a35703561b26b1a9d2738ad7d58b27827.html | 170 ++- ...8h_1abd1465eb38256d3f22cc1426b23d516b.html | 170 ++- ...8h_1abe87b341f562fd1cf40b7672e4d759da.html | 174 ++- ...8h_1ad19939408f7be171a74a89928b36eb59.html | 170 ++- ...8h_1adad592a7b1b7eed529cdf6acd584c883.html | 170 ++- docs/_cpp_api/dir_cpp.html | 158 ++- docs/_cpp_api/dir_cpp_include.html | 158 ++- .../dir_cpp_include_torch_tensorrt.html | 158 ++- ...8h_1a130f65408ad8cbaee060f05e8db69558.html | 170 ++- ...8h_1a3fbe5d72e4fc624dbd038853079620eb.html | 170 ++- ..._cpp_include_torch_tensorrt_logging.h.html | 158 ++- ...e_cpp_include_torch_tensorrt_macros.h.html | 159 ++- ...file_cpp_include_torch_tensorrt_ptq.h.html | 163 ++- ...clude_torch_tensorrt_torch_tensorrt.h.html | 160 +-- ...8h_1a0593f776f469c20469e2f729fc7861a3.html | 170 ++- ...8h_1a0c012cb374addd90eb1f42eaec570650.html | 170 ++- ...8h_1a56e110feaaba2c3fd44bd201fd21a76a.html | 170 ++- ...8h_1a7cb50492421ea9de4e3db895819df6f2.html | 174 ++- ...8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html | 170 ++- ...8h_1ad2efd47b6c3689e58ccc595680579ae5.html | 170 ++- ...8h_1af8f3443813315af7901903d25dd495cc.html | 170 ++- ...8h_1a226e3c83379d1012cde8578c1c86b16c.html | 674 +++++++++++ ...8h_1a6186e305f47c1d94b6130ef6c7f7e178.html | 680 +++++++++++ ...8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html | 170 ++- ...8h_1a6e19490a08fb1553c9dd347a5ae79db9.html | 170 ++- ...8h_1a710df824a7718b440e4bc17bf9693cef.html | 170 ++- ...8h_1ac4ab8313ae72c2c899ea31548b528528.html | 170 ++- ...8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html | 174 ++- ...8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html | 170 ++- ...8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html | 170 ++- docs/_cpp_api/namespace_torch_tensorrt.html | 171 ++- .../namespace_torch_tensorrt__logging.html | 170 ++- .../namespace_torch_tensorrt__ptq.html | 174 ++- ...namespace_torch_tensorrt__torchscript.html | 170 ++- ..._cpp_include_torch_tensorrt_logging.h.html | 158 ++- ...e_cpp_include_torch_tensorrt_macros.h.html | 162 +-- ...file_cpp_include_torch_tensorrt_ptq.h.html | 165 ++- ...clude_torch_tensorrt_torch_tensorrt.h.html | 221 ++-- .../structtorch__tensorrt_1_1Device.html | 174 ++- .../structtorch__tensorrt_1_1GraphInputs.html | 674 +++++++++++ .../structtorch__tensorrt_1_1Input.html | 212 ++-- ...ensorrt_1_1torchscript_1_1CompileSpec.html | 222 ++-- docs/_cpp_api/torch_tensort_cpp.html | 194 ++-- docs/_cpp_api/unabridged_orphan.html | 158 ++- docs/_modules/index.html | 159 +-- docs/_modules/torch_tensorrt/_Device.html | 197 ++-- docs/_modules/torch_tensorrt/_Input.html | 259 +++-- docs/_modules/torch_tensorrt/_compile.html | 324 +++--- docs/_modules/torch_tensorrt/_util.html | 173 +-- docs/_modules/torch_tensorrt/fx/fx2trt.html | 999 ++++++++++++++++ .../torch_tensorrt/fx/input_tensor_spec.html | 809 +++++++++++++ .../torch_tensorrt/fx/trt_module.html | 863 ++++++++++++++ docs/_modules/torch_tensorrt/logging.html | 176 +-- docs/_modules/torch_tensorrt/ptq.html | 260 +++-- .../torch_tensorrt/ts/_compile_spec.html | 424 ++++--- .../_modules/torch_tensorrt/ts/_compiler.html | 306 ++--- ...pp_include_torch_tensorrt_macros.h.rst.txt | 2 + ...e_cpp_include_torch_tensorrt_ptq.h.rst.txt | 6 +- ...de_torch_tensorrt_torch_tensorrt.h.rst.txt | 4 + ...1a226e3c83379d1012cde8578c1c86b16c.rst.txt | 14 + ...1a6186e305f47c1d94b6130ef6c7f7e178.rst.txt | 14 + .../_cpp_api/namespace_torch_tensorrt.rst.txt | 2 + .../namespace_torch_tensorrt__ptq.rst.txt | 4 +- ...p_include_torch_tensorrt_logging.h.rst.txt | 16 +- ...pp_include_torch_tensorrt_macros.h.rst.txt | 12 +- ...e_cpp_include_torch_tensorrt_ptq.h.rst.txt | 57 +- ...de_torch_tensorrt_torch_tensorrt.h.rst.txt | 177 +-- ...ructtorch__tensorrt_1_1GraphInputs.rst.txt | 17 + .../structtorch__tensorrt_1_1Input.rst.txt | 9 + docs/_sources/cli/torchtrtc.rst.txt | 146 +++ .../contributors/partitioning.rst.txt | 2 +- .../getting_started_with_cpp_api.rst.txt | 338 ++++++ .../getting_started_with_python_api.rst.txt | 58 + .../getting_started/installation.rst.txt | 362 ++++++ docs/_sources/index.rst.txt | 65 +- docs/_sources/py_api/fx.rst.txt | 31 + docs/_sources/py_api/ptq.rst.txt | 2 +- docs/_sources/py_api/torch_tensorrt.rst.txt | 2 +- .../docs/demo/demo.rst.txt | 4 +- .../docs/demo/lists_tables.rst.txt | 2 +- .../docs/demo/structure.rst.txt | 8 +- ...ating_torchscript_module_in_python.rst.txt | 7 +- .../getting_started_with_fx_path.rst.txt | 297 +++++ docs/_sources/tutorials/notebooks.rst.txt | 154 +++ docs/_sources/tutorials/ptq.rst.txt | 77 +- .../tutorials/use_from_pytorch.rst.txt | 12 +- docs/_sources/tutorials/using_dla.rst.txt | 5 +- docs/_static/css/theme.css | 26 + docs/cli/torchtrtc.html | 776 +++++++++++++ docs/contributors/conversion.html | 170 ++- docs/contributors/lowering.html | 170 ++- docs/contributors/partitioning.html | 170 ++- docs/contributors/phases.html | 158 ++- docs/contributors/runtime.html | 170 ++- docs/contributors/system_overview.html | 172 ++- docs/contributors/useful_links.html | 168 ++- docs/contributors/writing_converters.html | 168 ++- docs/genindex.html | 205 ++-- .../getting_started_with_cpp_api.html | 935 +++++++++++++++ .../getting_started_with_python_api.html | 689 +++++++++++ docs/getting_started/installation.html | 1012 +++++++++++++++++ docs/index.html | 207 ++-- docs/indices/supported_ops.html | 166 ++- docs/objects.inv | Bin 24856 -> 24827 bytes docs/py-modindex.html | 161 +-- docs/py_api/fx.html | 691 +++++++++++ docs/py_api/logging.html | 170 ++- docs/py_api/ptq.html | 170 ++- docs/py_api/torch_tensorrt.html | 173 ++- docs/py_api/ts.html | 219 ++-- docs/search.html | 164 ++- docs/searchindex.js | 2 +- .../pytorch-sphinx-theme/docs/changelog.html | 158 ++- .../docs/configuring.html | 158 ++- .../pytorch-sphinx-theme/docs/demo/api.html | 158 ++- .../pytorch-sphinx-theme/docs/demo/demo.html | 160 ++- .../docs/demo/lists_tables.html | 158 ++- .../pytorch-sphinx-theme/docs/demo/long.html | 158 ++- .../docs/demo/structure.html | 158 ++- docs/src/pytorch-sphinx-theme/docs/index.html | 158 ++- .../pytorch-sphinx-theme/docs/installing.html | 158 ++- ...creating_torchscript_module_in_python.html | 251 ++-- .../getting_started_with_fx_path.html | 912 +++++++++++++++ docs/tutorials/notebooks.html | 795 +++++++++++++ docs/tutorials/ptq.html | 253 ++--- docs/tutorials/runtime.html | 178 ++- .../serving_torch_tensorrt_with_triton.html | 176 ++- docs/tutorials/use_from_pytorch.html | 190 ++-- docs/tutorials/using_dla.html | 181 ++- 137 files changed, 20507 insertions(+), 8259 deletions(-) create mode 100644 docs/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html create mode 100644 docs/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html create mode 100644 docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html create mode 100644 docs/_modules/torch_tensorrt/fx/fx2trt.html create mode 100644 docs/_modules/torch_tensorrt/fx/input_tensor_spec.html create mode 100644 docs/_modules/torch_tensorrt/fx/trt_module.html create mode 100644 docs/_sources/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.rst.txt create mode 100644 docs/_sources/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.rst.txt create mode 100644 docs/_sources/_cpp_api/structtorch__tensorrt_1_1GraphInputs.rst.txt create mode 100644 docs/_sources/cli/torchtrtc.rst.txt create mode 100644 docs/_sources/getting_started/getting_started_with_cpp_api.rst.txt create mode 100644 docs/_sources/getting_started/getting_started_with_python_api.rst.txt create mode 100644 docs/_sources/getting_started/installation.rst.txt create mode 100644 docs/_sources/py_api/fx.rst.txt create mode 100644 docs/_sources/tutorials/getting_started_with_fx_path.rst.txt create mode 100644 docs/_sources/tutorials/notebooks.rst.txt create mode 100644 docs/cli/torchtrtc.html create mode 100644 docs/getting_started/getting_started_with_cpp_api.html create mode 100644 docs/getting_started/getting_started_with_python_api.html create mode 100644 docs/getting_started/installation.html create mode 100644 docs/py_api/fx.html create mode 100644 docs/tutorials/getting_started_with_fx_path.html create mode 100644 docs/tutorials/notebooks.html diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html index 64a3a981be..222e3ae811 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html @@ -9,22 +9,22 @@ - + Class DataType — Torch-TensorRT master documentation + + + + + + + + + - - - - - - - - - - + @@ -36,11 +36,11 @@ - + + - - + @@ -180,9 +180,9 @@ + - - +