pytorch · Mar 20, 2024
diff --git a/‎core/conversion/converters/impl/constant_pad.cpp
Lines changed: 1 addition & 1 deletion b/‎core/conversion/converters/impl/constant_pad.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/conversion/converters/impl/conv_deconv.cpp
Lines changed: 3 additions & 3 deletions b/‎core/conversion/converters/impl/conv_deconv.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎core/conversion/converters/impl/interpolate.cpp
Lines changed: 25 additions & 25 deletions b/‎core/conversion/converters/impl/interpolate.cpp
Lines changed: 25 additions & 25 deletions
diff --git a/‎core/conversion/converters/impl/linear.cpp
Lines changed: 17 additions & 13 deletions b/‎core/conversion/converters/impl/linear.cpp
Lines changed: 17 additions & 13 deletions
diff --git a/‎core/runtime/TRTEngine.cpp
Lines changed: 35 additions & 12 deletions b/‎core/runtime/TRTEngine.cpp
Lines changed: 35 additions & 12 deletions
diff --git a/‎cpp/include/torch_tensorrt/ptq.h
Lines changed: 4 additions & 4 deletions b/‎cpp/include/torch_tensorrt/ptq.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
Lines changed: 5 additions & 0 deletions b/‎py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
Lines changed: 5 additions & 0 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Lines changed: 3 additions & 3 deletions b/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/_conversion.py
Lines changed: 2 additions & 1 deletion b/‎py/torch_tensorrt/dynamo/conversion/_conversion.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/conv.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/conversion/impl/conv.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/deconv.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/conversion/impl/deconv.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/elementwise/base.py
Lines changed: 0 additions & 11 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/elementwise/base.py
Lines changed: 0 additions & 11 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/pad.py
Lines changed: 4 additions & 4 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/pad.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/permutation.py
Lines changed: 2 additions & 2 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/permutation.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/upsample.py
Lines changed: 3 additions & 3 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/upsample.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py
Lines changed: 70 additions & 72 deletions b/‎py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py
Lines changed: 70 additions & 72 deletions
diff --git a/‎py/torch_tensorrt/fx/converters/acc_ops_converters.py
Lines changed: 12 additions & 15 deletions b/‎py/torch_tensorrt/fx/converters/acc_ops_converters.py
Lines changed: 12 additions & 15 deletions
diff --git a/‎py/torch_tensorrt/fx/utils.py
Lines changed: 13 additions & 5 deletions b/‎py/torch_tensorrt/fx/utils.py
Lines changed: 13 additions & 5 deletions
diff --git a/‎tests/py/dynamo/runtime/test_convert_method_to_trt_engine.py
Lines changed: 3 additions & 5 deletions b/‎tests/py/dynamo/runtime/test_convert_method_to_trt_engine.py
Lines changed: 3 additions & 5 deletions
@@ -55,7 +55,7 @@ auto constant_pad_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns
            util::toDims(c10::IntArrayRef(stride)));
        TORCHTRT_CHECK(slice_layer, "Unable to create slice layer from node: " << *n);
        slice_layer->setName((util::node_info(n) + "_slice").c_str());
-       slice_layer->setMode(nvinfer1::SliceMode::kFILL);
+       slice_layer->setMode(nvinfer1::SampleMode::kFILL);
        slice_layer->setInput(4, *value_itensor);
 
        if (ctx->input_is_dynamic) {
 
@@ -61,7 +61,7 @@ nvinfer1::ILayer* add_bias_layer(
   auto* sliceLayer = ctx->net->addSlice(*input_tensor, dummy, dummy, stride);
   sliceLayer->setInput(1, *start);
   sliceLayer->setInput(2, *size);
-  sliceLayer->setMode(nvinfer1::SliceMode::kFILL);
+  sliceLayer->setMode(nvinfer1::SampleMode::kFILL);
   nvinfer1::ITensor* slice_output = sliceLayer->getOutput(0);
 
   nvinfer1::Dims constantDims;
@@ -194,7 +194,7 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
       nvinfer1::IConvolutionLayer* convLayer =
           ctx->net->addConvolutionNd(*in, num_output_maps, filter_dim, kernel_weights, bias.data);
       convLayer->setStrideNd(stride);
-      convLayer->setPaddingMode(nvinfer1::PaddingMode::kCAFFE_ROUND_DOWN);
+      convLayer->setPaddingMode(nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN);
       convLayer->setPaddingNd(padding);
       convLayer->setPostPadding(out_padding);
       convLayer->setDilationNd(dilation);
@@ -293,7 +293,7 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
     TORCHTRT_CHECK(conv, "Unable to create convolution layer from node: " << *n);
 
     conv->setStrideNd(stride);
-    conv->setPaddingMode(nvinfer1::PaddingMode::kCAFFE_ROUND_DOWN);
+    conv->setPaddingMode(nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN);
     conv->setPaddingNd(padding);
     conv->setPostPadding(out_padding);
     conv->setDilationNd(dilation);
 
@@ -72,7 +72,7 @@ void resize_layer_size(
     nvinfer1::ITensor* in,
     std::vector<int64_t> out_shape,
     std::vector<float> scales,
-    nvinfer1::ResizeMode mode,
+    nvinfer1::InterpolationMode mode,
     bool align_corners = false) {
   TORCHTRT_CHECK((out_shape.size() > 0) ^ (scales.size() > 0), "only one of out_shape or scales should be defined");
   auto resize_layer = ctx->net->addResize(*in);
@@ -141,7 +141,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  float scale = args[2].IValue()->toDouble();
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 1] = scale;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kNEAREST);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -150,7 +150,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kNEAREST);
                }
 
                return true;
@@ -172,7 +172,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  float scale = scale_factors[0];
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 1] = scale;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kNEAREST);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -181,7 +181,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kNEAREST);
                }
 
                return true;
@@ -203,7 +203,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kNEAREST);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -212,7 +212,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kNEAREST);
                }
 
                return true;
@@ -236,7 +236,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kNEAREST);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -245,7 +245,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kNEAREST);
                }
 
                return true;
@@ -270,7 +270,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  padded_scales[padded_scales.size() - 3] = scale_d;
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kNEAREST);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -279,7 +279,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kNEAREST);
                }
 
                return true;
@@ -306,7 +306,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  padded_scales[padded_scales.size() - 3] = scale_d;
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kNEAREST);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -315,7 +315,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kNEAREST);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kNEAREST);
                }
 
                return true;
@@ -336,7 +336,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  float scale = args[3].IValue()->toDouble();
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 1] = scale;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -345,7 +345,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                }
 
                return true;
@@ -368,7 +368,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  float scale = scale_factors[0];
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 1] = scale;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -377,7 +377,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                }
 
                return true;
@@ -400,7 +400,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -410,7 +410,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                }
 
                return true;
@@ -435,7 +435,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  std::vector<float> padded_scales(in_shape.size(), 1);
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -445,7 +445,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                }
 
                return true;
@@ -470,7 +470,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  padded_scales[padded_scales.size() - 3] = scale_d;
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -480,7 +480,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                }
 
                return true;
@@ -507,7 +507,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
                  padded_scales[padded_scales.size() - 3] = scale_d;
                  padded_scales[padded_scales.size() - 2] = scale_h;
                  padded_scales[padded_scales.size() - 1] = scale_w;
-                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, {}, padded_scales, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                } else {
                  // Case 2: user uses output size
                  auto out_size = util::toVec(util::toDims(args[1].unwrapToIntList()));
@@ -517,7 +517,7 @@ auto interpolate_registrations TORCHTRT_UNUSED =
 
                  auto out_shape = in_shape;
                  std::copy(out_size.begin(), out_size.end(), out_shape.begin() + (in_shape.size() - out_size.size()));
-                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::ResizeMode::kLINEAR, align_corners);
+                 resize_layer_size(ctx, n, in, out_shape, {}, nvinfer1::InterpolationMode::kLINEAR, align_corners);
                }
 
                return true;
 
@@ -40,22 +40,26 @@ auto linear_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pat
          in = in_shuffle->getOutput(0);
        }
 
-       auto w_tensor = args[1].IValue()->toTensor();
-       Weights w = Weights(ctx, w_tensor);
+       // Convert w_tensor to ITensor
+       auto weight = args[1].IValue()->toTensor();
+       auto weight_tensor = tensor_to_const(ctx, weight, util::node_info(n) + "_weight");
+       auto mm_layer = ctx->net->addMatrixMultiply(
+           *in, nvinfer1::MatrixOperation::kNONE, *weight_tensor, nvinfer1::MatrixOperation::kNONE);
 
-       nvinfer1::ILayer* new_layer;
-       if (!args[2].IValue()->isNone()) {
-         Weights b(ctx, args[2].IValue()->toTensor());
-         new_layer = ctx->net->addFullyConnected(*in, w.num_output_maps, w.data, b.data);
-       } else {
-         LOG_DEBUG("There is no bias for the linear layer");
-         new_layer = ctx->net->addFullyConnected(*in, w.num_output_maps, w.data, Weights().data);
-       }
+       TORCHTRT_CHECK(mm_layer, "Unable to create linear layer from node: " << *n);
+       mm_layer->setName(util::node_info(n).c_str());
 
-       TORCHTRT_CHECK(new_layer, "Unable to create linear layer from node: " << *n);
+       auto mm_output = mm_layer->getOutput(0);
 
-       new_layer->setName(util::node_info(n).c_str());
-       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
+       if (!args[2].IValue()->isNone()) {
+         // Convert bias to ITensor
+         auto bias = args[2].IValue()->toTensor();
+         auto bias_tensor = tensor_to_const(ctx, bias, util::node_info(n) + "_bias");
+         auto bias_add_layer = add_elementwise(
+             ctx, nvinfer1::ElementWiseOperation::kSUM, mm_output, bias_tensor, util::node_info(n) + "_bias_add");
+         mm_output = bias_add_layer->getOutput(0);
+       }
+       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], mm_output);
 
        LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
 
 
@@ -120,16 +120,25 @@ TRTEngine::TRTEngine(
   } else {
     uint64_t inputs_size = _in_binding_names.size();
     in_binding_names.resize(inputs_size);
-    for (size_t pyt_idx = 0; pyt_idx < inputs_size; pyt_idx++) {
+    for (uint64_t pyt_idx = 0; pyt_idx < inputs_size; pyt_idx++) {
       auto binding_name = _in_binding_names[pyt_idx];
-      auto trt_idx = cuda_engine->getBindingIndex(binding_name.c_str());
-      std::string engine_binded_name = cuda_engine->getIOTensorName(trt_idx);
-      TORCHTRT_CHECK(
-          (binding_name == engine_binded_name),
-          "Could not find a TensorRT engine binding for input named " << binding_name);
+      // Check if the binding name provided is in the list of engine's bindings
+      // by iterating through nbIOTensors and verify it is an input binding
+      bool is_binding = false, is_input = false;
+      int32_t trt_idx;
+      for (int32_t idx = 0; idx < cuda_engine->getNbIOTensors(); idx++) {
+        std::string curr_bind_name = cuda_engine->getIOTensorName(idx);
+        if (curr_bind_name == binding_name) {
+          is_binding = true;
+          trt_idx = idx;
+          if (cuda_engine->getTensorIOMode(binding_name.c_str()) == nvinfer1::TensorIOMode::kINPUT) {
+            is_input = true;
+          }
+        }
+      }
+      TORCHTRT_CHECK(is_binding, "Could not find a TensorRT engine binding for input named " << binding_name);
       TORCHTRT_CHECK(
-          (cuda_engine->getTensorIOMode(binding_name.c_str()) == nvinfer1::TensorIOMode::kINPUT),
-          "Binding " << binding_name << " specified as input but found as output in TensorRT engine");
+          is_input, "Binding " << binding_name << " specified as input but found as output in TensorRT engine");
       LOG_DEBUG(
           "Input binding name: " << binding_name << " has TensorRT binding index: " << trt_idx
                                  << ", Torch binding index: " << pyt_idx);
@@ -141,11 +150,25 @@ TRTEngine::TRTEngine(
     out_binding_names.resize(outputs);
     for (size_t pyt_idx = 0; pyt_idx < outputs; pyt_idx++) {
       auto binding_name = _out_binding_names[pyt_idx];
-      auto trt_idx = cuda_engine->getBindingIndex(binding_name.c_str());
-      TORCHTRT_CHECK((trt_idx != -1), "Could not find a TensorRT engine binding for output named " << binding_name);
+      // Check if the binding name provided is in the list of engine's bindings
+      // by iterating through nbIOTensors and verify it is an output binding
+      bool is_binding = false, is_output = false;
+      int32_t trt_idx;
+      for (int32_t idx = 0; idx < cuda_engine->getNbIOTensors(); idx++) {
+        std::string curr_bind_name = cuda_engine->getIOTensorName(idx);
+        if (curr_bind_name == binding_name) {
+          is_binding = true;
+          trt_idx = idx;
+          if (cuda_engine->getTensorIOMode(binding_name.c_str()) == nvinfer1::TensorIOMode::kOUTPUT) {
+            is_output = true;
+          }
+        }
+      }
+
+      TORCHTRT_CHECK(is_binding, "Could not find a TensorRT engine binding for output named " << binding_name);
       TORCHTRT_CHECK(
-          !(cuda_engine->getTensorIOMode(binding_name.c_str()) == nvinfer1::TensorIOMode::kINPUT),
-          "Binding " << binding_name << " specified as output but found as input in TensorRT engine");
+          is_output, "Binding " << binding_name << " specified as output but found as input in TensorRT engine");
+
       LOG_DEBUG(
           "Output binding name: " << binding_name << " has TensorRT binding index: " << trt_idx
                                   << ", Torch binding index: " << inputs_size + pyt_idx);
 
@@ -21,10 +21,10 @@
 #include "torch_tensorrt/macros.h"
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
-namespace nvinfer1 {
-class IInt8Calibrator;
-class IInt8EntropyCalibrator2;
-} // namespace nvinfer1
+// namespace nvinfer1 {
+// class IInt8Calibrator;
+// class IInt8EntropyCalibrator2;
+// } // namespace nvinfer1
 
 namespace torch_tensorrt {
 namespace ptq {
 
@@ -2,6 +2,7 @@
 #include "pybind11/stl.h"
 
 #include "ATen/core/jit_type.h"
+#include "NvInferRuntimeBase.h"
 #include "Python.h"
 #include "core/compiler.h"
 #include "core/conversion/conversion.h"
@@ -77,6 +78,10 @@ class pyIInt8Calibrator : public pyCalibratorTrampoline<nvinfer1::IInt8Calibrato
   using Derived = pyCalibratorTrampoline<nvinfer1::IInt8Calibrator>;
   using Derived::Derived;
 
+  nvinfer1::InterfaceInfo getInterfaceInfo() const noexcept override {
+    return nvinfer1::InterfaceInfo{"PYTHON CALIBRATOR", 1, 0};
+  }
+
   nvinfer1::CalibrationAlgoType getAlgorithm() noexcept override {
     try {
       PYBIND11_OVERLOAD_PURE_NAME(
 
@@ -634,7 +634,7 @@ def convert_module_to_trt_engine(
     import io
 
     with io.BytesIO() as engine_bytes:
-        engine_bytes.write(interpreter_result.engine.serialize())
+        engine_bytes.write(interpreter_result.engine)
         engine_bytearray = engine_bytes.getvalue()
 
     return engine_bytearray
@@ -172,7 +172,7 @@ def run(
 
         if version.parse(trt.__version__) >= version.parse("8.2"):
             builder_config.profiling_verbosity = (
-                trt.ProfilingVerbosity.VERBOSE
+                trt.ProfilingVerbosity.DETAILED
                 if self.compilation_settings.debug
                 else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
             )
@@ -252,7 +252,7 @@ def run(
         if tactic_sources is not None:
             builder_config.set_tactic_sources(tactic_sources=tactic_sources)
 
-        engine = self.builder.build_engine(self.ctx.net, builder_config)
+        engine = self.builder.build_serialized_network(self.ctx.net, builder_config)
         assert engine
 
         serialized_cache = (
@@ -263,7 +263,7 @@ def run(
         _LOGGER.info(
             f"Build TRT engine elapsed time: {datetime.now() - build_engine_start_time}"
         )
-        _LOGGER.info(f"TRT Engine uses: {engine.device_memory_size} bytes of Memory")
+        _LOGGER.info(f"TRT Engine uses: {engine.nbytes} bytes of Memory")
 
         return TRTInterpreterResult(
             engine, self._input_names, self._output_names, serialized_cache
 
@@ -87,8 +87,9 @@ def convert_module(
         from torch_tensorrt.dynamo.runtime import TorchTensorRTModule
 
         with io.BytesIO() as engine_bytes:
-            engine_bytes.write(interpreter_result.engine.serialize())
+            engine_bytes.write(interpreter_result.engine)
             engine_str = engine_bytes.getvalue()
+
         return TorchTensorRTModule(
             serialized_engine=engine_str,
             name=name,
 
@@ -63,7 +63,7 @@ def convNd(
         )
 
     # Process weight terms
-    if ctx.net.has_explicit_precision or isinstance(weight, TRTTensor):
+    if isinstance(weight, TRTTensor):
         weight = get_trt_tensor(ctx, weight, f"{name}_weight")
         # Append new dimension (unsqueeze) if the convolution is 1d
         if is_conv1d:
 
@@ -63,7 +63,7 @@ def deconvNd(
         )
 
     # Process weight terms
-    if ctx.net.has_explicit_precision or isinstance(weight, TRTTensor):
+    if isinstance(weight, TRTTensor):
         weight = get_trt_tensor(ctx, weight, f"{name}_weight")
         # Append new dimension (unsqueeze) if the deconvolution is 1d
         if is_deconv1d:
 
@@ -147,17 +147,6 @@ def convert_binary_elementwise(
             ctx, rhs_val, trt_promoted_type, name, target, source_ir
         )
 
-    # Check the limitation in the doc string.
-    if ctx.net.has_implicit_batch_dimension:
-        if is_lhs_trt_tensor and not is_rhs_trt_tensor:
-            assert len(lhs_val.shape) >= len(
-                rhs_val.shape
-            ), f"{lhs_val.shape} >= {rhs_val.shape}"
-        elif not is_lhs_trt_tensor and is_rhs_trt_tensor:
-            assert len(rhs_val.shape) >= len(
-                lhs_val.shape
-            ), f"{rhs_val.shape} >= {lhs_val.shape}"
-
     lhs_val, rhs_val = broadcast(
         ctx.net, lhs_val, rhs_val, f"{name}_lhs", f"{name}_rhs"
     )
 
@@ -53,7 +53,7 @@ def constant_padNd(
     )
     value_const = get_trt_tensor(ctx, value, f"{name}_value", input.dtype)
     layer.set_input(4, value_const)
-    layer.mode = trt.SliceMode.FILL
+    layer.mode = trt.SampleMode.FILL
 
     set_layer_name(layer, target, name, source_ir)
     return layer.get_output(0)
@@ -91,7 +91,7 @@ def reflection_padNd(
         shape=tuple(new_shape),
         stride=tuple(stride_list),
     )
-    layer.mode = trt.SliceMode.REFLECT
+    layer.mode = trt.SampleMode.REFLECT
 
     set_layer_name(layer, target, name, source_ir)
     return layer.get_output(0)
@@ -129,7 +129,7 @@ def replication_padNd(
         shape=tuple(new_shape),
         stride=tuple(stride_list),
     )
-    layer.mode = trt.SliceMode.CLAMP
+    layer.mode = trt.SampleMode.CLAMP
 
     set_layer_name(layer, target, name, source_ir)
     return layer.get_output(0)
@@ -167,7 +167,7 @@ def circular_padNd(
         shape=tuple(new_shape),
         stride=tuple(stride_list),
     )
-    layer.mode = trt.SliceMode.WRAP
+    layer.mode = trt.SampleMode.WRAP
 
     set_layer_name(layer, target, name, source_ir)
     return layer.get_output(0)
 
@@ -66,7 +66,7 @@ def roll(
             shape=shape,
             stride=stride,
         )
-        layer.mode = trt.SliceMode.WRAP
+        layer.mode = trt.SampleMode.WRAP
         set_layer_name(layer, target, f"{name}_slice_wrap", source_ir)
         return layer.get_output(0)
 
@@ -83,7 +83,7 @@ def roll(
             shape=flatten_shape,
             stride=stride,
         )
-        layer.mode = trt.SliceMode.WRAP
+        layer.mode = trt.SampleMode.WRAP
         set_layer_name(layer, target, f"{name}_slice_wrap", source_ir)
         output = layer.get_output(0)
         output = impl.shuffle.reshape(
 
@@ -29,14 +29,14 @@ def upsample(
         resize_layer.scales = [1.0, 1.0] + list(scale_factors)
     else:
         raise RuntimeError(
-            f"At least one of out_shape and scale_factors should be specified."
+            "At least one of out_shape and scale_factors should be specified."
         )
 
     # interpolate mode
     if resize_mode == "nearest" or None:
-        resize_layer.resize_mode = trt.ResizeMode.NEAREST
+        resize_layer.resize_mode = trt.InterpolationMode.NEAREST
     elif resize_mode == "bilinear":
-        resize_layer.resize_mode = trt.ResizeMode.LINEAR
+        resize_layer.resize_mode = trt.InterpolationMode.LINEAR
         if align_corners is None or not align_corners:
             raise RuntimeError(
                 f"Interpolation works differently is align_corners is False for {resize_mode} mode in PyTorch and TensorRT."
 
@@ -2,7 +2,7 @@
 
 import logging
 from contextlib import nullcontext
-from typing import Any, Dict, List, Optional, Sequence, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import tensorrt as trt
 import torch
@@ -55,73 +55,69 @@ def __init__(
 
     def _initialize(self) -> None:
         self.initialized = True
+        logger = trt.Logger()
+        runtime = trt.Runtime(logger)
+        self.engine = runtime.deserialize_cuda_engine(self.engine)
         self.context = self.engine.create_execution_context()
 
         # Indices of inputs/outputs in the trt engine bindings, in the order
         # as they are in the original PyTorch model.
-        self.input_binding_indices_in_order: Sequence[int] = [
-            self.engine.get_binding_index(name) for name in self.input_names
-        ]
-        self.output_binding_indices_in_order: Sequence[int] = [
-            self.engine.get_binding_index(name) for name in self.output_names
-        ]
-        primary_input_outputs = set()
-        primary_input_outputs.update(self.input_binding_indices_in_order)
-        primary_input_outputs.update(self.output_binding_indices_in_order)
-        self.hidden_output_binding_indices_in_order: Sequence[int] = []
-        self.hidden_output_names: Sequence[str] = []
-        for i in range(
-            self.engine.num_bindings // self.engine.num_optimization_profiles
-        ):
-            if i not in primary_input_outputs:
-                self.hidden_output_binding_indices_in_order.append(i)
-                self.hidden_output_names.append(self.engine.get_binding_name(i))
 
-        assert (self.engine.num_bindings // self.engine.num_optimization_profiles) == (
+        # TODO: Verify if the following is required especially the hidden outputs
+        # self.input_binding_indices_in_order: Sequence[int] = [
+        #     self.engine.get_binding_index(name) for name in self.input_names
+        # ]
+        # self.output_binding_indices_in_order: Sequence[int] = [
+        #     self.engine.get_binding_index(name) for name in self.output_names
+        # ]
+        # primary_input_outputs = set()
+        # primary_input_outputs.update(self.input_binding_indices_in_order)
+        # primary_input_outputs.update(self.output_binding_indices_in_order)
+        # self.hidden_output_binding_indices_in_order: Sequence[int] = []
+        # self.hidden_output_names: Sequence[str] = []
+        # for i in range(
+        #     self.engine.num_bindings // self.engine.num_optimization_profiles
+        # ):
+        #     if i not in primary_input_outputs:
+        #         self.hidden_output_binding_indices_in_order.append(i)
+        #         self.hidden_output_names.append(self.engine.get_binding_name(i))
+
+        assert (
+            self.engine.num_io_tensors // self.engine.num_optimization_profiles
+        ) == (
             len(self.input_names)
             + len(self.output_names)
-            + len(self.hidden_output_names)
+            # + len(self.hidden_output_names) #TODO: Verify if this is required
         )
 
         self.input_dtypes = [
             unified_dtype_converter(
-                self.engine.get_binding_dtype(idx), Frameworks.TORCH
+                self.engine.get_tensor_dtype(input_name), Frameworks.TORCH
             )
-            for idx in self.input_binding_indices_in_order
+            for input_name in self.input_names
         ]
-        self.input_shapes: Sequence[Sequence[int]] = [
-            tuple(self.engine.get_binding_shape(idx))
-            for idx in self.input_binding_indices_in_order
+        self.input_shapes = [
+            self.engine.get_tensor_shape(input_name) for input_name in self.input_names
         ]
         self.output_dtypes = [
             unified_dtype_converter(
-                self.engine.get_binding_dtype(idx), Frameworks.TORCH
+                self.engine.get_tensor_dtype(output_name), Frameworks.TORCH
             )
-            for idx in self.output_binding_indices_in_order
+            for output_name in self.output_names
         ]
         self.output_shapes = [
-            (
-                tuple(self.engine.get_binding_shape(idx))
-                if self.engine.has_implicit_batch_dimension
-                else tuple()
-            )
-            for idx in self.output_binding_indices_in_order
-        ]
-        self.hidden_output_dtypes = [
-            unified_dtype_converter(
-                self.engine.get_binding_dtype(idx), Frameworks.TORCH
-            )
-            for idx in self.hidden_output_binding_indices_in_order
-        ]
-        self.hidden_output_shapes = [
-            (
-                tuple(self.engine.get_binding_shape(idx))
-                if self.engine.has_implicit_batch_dimension
-                else tuple()
-            )
-            for idx in self.hidden_output_binding_indices_in_order
+            self.engine.get_tensor_shape(output_name)
+            for output_name in self.output_names
         ]
 
+        # TODO: Verify what this is for ?
+        # self.hidden_output_dtypes = [
+        #     unified_dtype_converter(
+        #         self.engine.get_binding_dtype(idx), Frameworks.TORCH
+        #     )
+        #     for idx in self.hidden_output_binding_indices_in_order
+        # ]
+
     def _check_initialized(self) -> None:
         if not self.initialized:
             raise RuntimeError("PythonTorchTensorRTModule is not initialized.")
@@ -217,12 +213,12 @@ def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, .
                 ), f"Wrong number of inputs, expect {len(self.input_names)} get {len(inputs)}."
 
                 contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
-                bindings: List[Any] = [None] * (
-                    len(self.input_names)
-                    + len(self.output_names)
-                    + len(self.hidden_output_names)
-                )
-
+                bindings = []
+                # [None] * (
+                #     len(self.input_names)
+                #     + len(self.output_names)
+                #     # + len(self.hidden_output_names) # TODO: Verify if this is required
+                # )
                 for i, input_name in enumerate(self.input_names):
                     if not contiguous_inputs[i].is_cuda:
                         logger.warning(
@@ -241,11 +237,9 @@ def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, .
                         contiguous_inputs[i].dtype == self.input_dtypes[i]
                     ), f"Dtype mismatch for {i}th input({input_name}). Expect {self.input_dtypes[i]}, got {contiguous_inputs[i].dtype}."
 
-                    idx = self.input_binding_indices_in_order[i]
-                    bindings[idx] = contiguous_inputs[i].data_ptr()
-
-                    self.context.set_binding_shape(
-                        idx, tuple(contiguous_inputs[i].shape)
+                    bindings.append(contiguous_inputs[i].data_ptr())
+                    self.context.set_input_shape(
+                        input_name, tuple(contiguous_inputs[i].shape)
                     )
 
             with (
@@ -258,26 +252,32 @@ def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, .
                 # create output tensors
                 outputs: List[torch.Tensor] = []
 
-                for i, idx in enumerate(self.output_binding_indices_in_order):
-                    shape = tuple(self.context.get_binding_shape(idx))
+                for i, output_name in enumerate(self.output_names):
+                    shape = tuple(self.context.get_tensor_shape(output_name))
 
                     output = torch.empty(
                         size=shape,
                         dtype=self.output_dtypes[i],
                         device=torch.cuda.current_device(),
                     )
+                    bindings.append(output.data_ptr())
                     outputs.append(output)
-                    bindings[idx] = output.data_ptr()
 
-                for i, idx in enumerate(self.hidden_output_binding_indices_in_order):
-                    shape = tuple(self.context.get_binding_shape(idx))
+                # TODO: Check what is this for ?
+                # for i, idx in enumerate(self.hidden_output_binding_indices_in_order):
+                #     shape = tuple(self.context.get_binding_shape(idx))
 
-                    output = torch.empty(
-                        size=shape,
-                        dtype=self.hidden_output_dtypes[i],
-                        device=torch.cuda.current_device(),
-                    )
-                    bindings[idx] = output.data_ptr()
+                #     output = torch.empty(
+                #         size=shape,
+                #         dtype=self.hidden_output_dtypes[i],
+                #         device=torch.cuda.current_device(),
+                #     )
+
+            # Assign tensor address appropriately
+            for idx in range(self.engine.num_io_tensors):
+                self.context.set_tensor_address(
+                    self.engine.get_tensor_name(idx), bindings[idx]
+                )
 
             with (
                 torch.autograd.profiler.record_function(
@@ -286,9 +286,7 @@ def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, .
                 if self.profiling_enabled
                 else nullcontext()
             ):
-                self.context.execute_async_v2(
-                    bindings, torch.cuda.current_stream().cuda_stream
-                )
+                self.context.execute_async_v3(torch.cuda.current_stream().cuda_stream)
 
             if len(outputs) == 1:
                 return outputs[0]
 
@@ -3,30 +3,27 @@
 import math
 import operator
 import warnings
-from typing import cast, Dict, Optional, Sequence, Tuple, Union
+from typing import Dict, Optional, Sequence, Tuple, Union, cast
 
 import numpy as np
 
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
-
-from ..converter_registry import tensorrt_converter
-
-from ..tracer.acc_tracer import acc_ops
-from ..types import *  # noqa: F403
 from torch.fx.immutable_collections import immutable_list
 from torch.fx.node import Argument, Target
-
-from ..utils import get_dynamic_dims, unified_dtype_converter, Frameworks
-
-from .converter_utils import *  # noqa: F403
+from torch_tensorrt.fx.converters.impl import activation, convolution
 from torch_tensorrt.fx.passes.lower_basic_pass import (
     trt_transposed_linear,
     trt_transposed_matmul,
 )
 from torch_tensorrt.fx.tracer.acc_tracer.acc_ops import contiguous
-from torch_tensorrt.fx.converters.impl import activation, convolution
+
+from ..converter_registry import tensorrt_converter
+from ..tracer.acc_tracer import acc_ops
+from ..types import *  # noqa: F403
+from ..utils import Frameworks, get_dynamic_dims, unified_dtype_converter
+from .converter_utils import *  # noqa: F403
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
@@ -323,7 +320,7 @@ def acc_ops_pad_with_slice_layer(
     )
 
     layer.set_input(4, value_const)
-    layer.mode = trt.SliceMode.FILL
+    layer.mode = trt.SampleMode.FILL
     set_layer_name(layer, target, name)
 
     return layer.get_output(0)
@@ -840,7 +837,7 @@ def acc_ops_tile(
         shapes = [1] * len(dims)
     strides = [1] * len(dims)
     layer = network.add_slice(input_val, starts, shapes, strides)
-    layer.mode = trt.SliceMode.WRAP
+    layer.mode = trt.SampleMode.WRAP
     set_layer_name(layer, target, name)
 
     if has_dynamic_shape(input_val.shape):  # type: ignore[union-attr]
@@ -3536,9 +3533,9 @@ def acc_ops_interpolate(
                 layer.scales = [1, 1] + list(scale_factor)
 
     if mode.lower() in ["linear", "bilinear", "trilinear"]:
-        layer.resize_mode = trt.ResizeMode.LINEAR
+        layer.resize_mode = trt.InterpolationMode.LINEAR
     else:
-        layer.resize_mode = trt.ResizeMode.NEAREST
+        layer.resize_mode = trt.InterpolationMode.NEAREST
 
     if (align_corners is not None) and align_corners:
         layer.coordinate_transformation = (
 
@@ -1,18 +1,21 @@
 from enum import Enum
-from typing import Dict, List, Optional, Callable, Union
+from typing import Callable, Dict, List, Optional, Union
+
 import numpy as np
-from packaging import version
 
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
 from functorch import make_fx
 from functorch.experimental import functionalize
+from torch_tensorrt._utils import sanitized_torch_version
 from torch_tensorrt.fx.passes.lower_basic_pass import (
     replace_op_with_indices,
     run_const_fold,
 )
-from torch_tensorrt._utils import sanitized_torch_version
+
+from packaging import version
+
 from .types import Shape, TRTDataType
 
 
@@ -45,6 +48,11 @@ class Frameworks(Enum):
         Frameworks.TORCH: torch.float32,
         Frameworks.TRT: trt.float32,
     },
+    trt.bool: {
+        Frameworks.NUMPY: bool,
+        Frameworks.TORCH: torch.bool,
+        Frameworks.TRT: trt.bool,
+    },
 }
 
 if trt.__version__ >= "7.0":
@@ -89,10 +97,10 @@ def unified_dtype_converter(
         The equivalent data type in the requested framework.
     """
     assert to in Frameworks, f"Expected valid Framework for translation, got {to}"
-
+    trt_major_version = int(trt.__version__.split(".")[0])
     if dtype in (np.int8, torch.int8, trt.int8):
         return DataTypeEquivalence[trt.int8][to]
-    elif trt.__version__ >= "7.0" and dtype in (np.bool_, torch.bool, trt.bool):
+    elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool):
         return DataTypeEquivalence[trt.bool][to]
     elif dtype in (np.int32, torch.int32, trt.int32):
         return DataTypeEquivalence[trt.int32][to]
 
@@ -25,12 +25,10 @@ def forward(self, a, b):
             symbolic_traced_gm, "forward", inputs=[input_data_0, input_data_1]
         )
 
-        # Deserialize the TensorRT engine
-        with trt.Logger() as logger, trt.Runtime(logger) as runtime:
-            engine = runtime.deserialize_cuda_engine(trt_engine_str)
-
         # Inference on TRT Engine
-        py_trt_module = PythonTorchTensorRTModule(engine, ["a", "b"], ["output0"])
+        py_trt_module = PythonTorchTensorRTModule(
+            trt_engine_str, ["a", "b"], ["output0"]
+        )
         trt_output = py_trt_module(input_data_0, input_data_1).cpu()
 
         # Inference on PyTorch model
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ def convNd(`
`63`	`63`	`)`
`64`	`64`
`65`	`65`	`# Process weight terms`
`66`		`- if ctx.net.has_explicit_precision or isinstance(weight, TRTTensor):`
	`66`	`+ if isinstance(weight, TRTTensor):`
`67`	`67`	`weight = get_trt_tensor(ctx, weight, f"{name}_weight")`
`68`	`68`	`# Append new dimension (unsqueeze) if the convolution is 1d`
`69`	`69`	`if is_conv1d:`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ def deconvNd(`
`63`	`63`	`)`
`64`	`64`
`65`	`65`	`# Process weight terms`
`66`		`- if ctx.net.has_explicit_precision or isinstance(weight, TRTTensor):`
	`66`	`+ if isinstance(weight, TRTTensor):`
`67`	`67`	`weight = get_trt_tensor(ctx, weight, f"{name}_weight")`
`68`	`68`	`# Append new dimension (unsqueeze) if the deconvolution is 1d`
`69`	`69`	`if is_deconv1d:`