pytorch · peri044 · Jun 28, 2023 · Jun 7, 2023
diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py
@@ -45,6 +45,7 @@ def compile(
     min_block_size=MIN_BLOCK_SIZE,
     torch_executed_ops=[],
     torch_executed_modules=[],
+    pass_through_build_failures=PASS_THROUGH_BUILD_FAILURES,
     **kwargs,
 ):
     if debug:
@@ -86,6 +87,7 @@ def compile(
         workspace_size=workspace_size,
         min_block_size=min_block_size,
         torch_executed_ops=torch_executed_ops,
+        pass_through_build_failures=pass_through_build_failures,
         **kwargs,
     )
 

diff --git a/py/torch_tensorrt/dynamo/backend/conversion.py b/py/torch_tensorrt/dynamo/backend/conversion.py
@@ -36,7 +36,6 @@ def convert_module(
     interpreter = TRTInterpreter(
         module,
         InputTensorSpec.from_tensors(inputs),
-        explicit_batch_dimension=True,
         logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
         output_dtypes=output_dtypes,
     )

diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py b/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py
@@ -38,8 +38,6 @@ def __init__(
         self,
         module: torch.fx.GraphModule,
         input_specs: List[InputTensorSpec],
-        explicit_batch_dimension: bool = True,
-        explicit_precision: bool = False,
         logger_level=None,
         output_dtypes=None,
     ):
@@ -49,17 +47,11 @@ def __init__(
         self.builder = trt.Builder(self.logger)
 
         flag = 0
-        if explicit_batch_dimension:
-            EXPLICIT_BATCH = 1 << (int)(
-                trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH
-            )
-            flag |= EXPLICIT_BATCH
 
-        if explicit_precision:
-            EXPLICIT_PRECISION = 1 << (int)(
-                trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION
-            )
-            flag |= EXPLICIT_PRECISION
+        # It is deprecated to not use this flag
+        EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        flag |= EXPLICIT_BATCH
+
         self.network = self.builder.create_network(flag)
 
         missing_ops = self.validate_conversion()

diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/lower.py b/py/torch_tensorrt/dynamo/fx_ts_compat/lower.py
@@ -49,6 +49,9 @@ def compile(
     cuda_graph_batch_size=-1,
     is_aten=False,
     use_experimental_fx_rt=False,
+    max_aux_streams=None,
+    version_compatible=False,
+    optimization_level=None,
     num_avg_timing_iters=1,
     torch_executed_ops=[],
     torch_executed_modules=[],
@@ -68,14 +71,12 @@ def compile(
         save_timing_cache: Update timing cache with current timing cache data if set to True.
         cuda_graph_batch_size: Cuda graph batch size, default to be -1.
         use_experimental_fx_rt: Uses the next generation TRTModule which supports both Python and TorchScript based execution (including in C++).
+        max_aux_streams: max number of aux stream to use
+        version_compatible: enable version compatible feature
+        optimization_level: builder optimization level
     Returns:
         A torch.nn.Module lowered by TensorRT.
     """
-    if use_experimental_fx_rt and not explicit_batch_dimension:
-        raise ValueError(
-            "The experimental unifed runtime only supports explicit batch. Please make sure to set explicit_batch_dimension=True when use_experimental_fx_rt=True"
-        )
-
     logger.warn(
         "For ir=fx_ts_compat backend only the "
         + "following arguments are supported: "
@@ -123,6 +124,9 @@ def compile(
         cuda_graph_batch_size=cuda_graph_batch_size,
         is_aten=is_aten,
         use_experimental_rt=use_experimental_fx_rt,
+        max_aux_streams=max_aux_streams,
+        version_compatible=version_compatible,
+        optimization_level=optimization_level,
     )
     lowerer = Lowerer.create(lower_setting=lower_setting)
     return lowerer(module, inputs)
@@ -162,8 +166,6 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
         interpreter = TRTInterpreter(
             mod,
             input_specs=self.lower_setting.input_specs,
-            explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
-            explicit_precision=self.lower_setting.explicit_precision,
             logger_level=trt.Logger.VERBOSE
             if self.lower_setting.debug
             else trt.Logger.WARNING,
@@ -198,7 +200,7 @@ def default_split_function(
     model: fx.GraphModule, inputs: Input, lower_setting: LowerSetting
 ) -> SplitResult:
     splitter_setting = TRTSplitterSetting()
-    splitter_setting.use_implicit_batch_dim = not lower_setting.explicit_batch_dimension
+    splitter_setting.use_implicit_batch_dim = False
     splitter_setting.min_block_size = lower_setting.min_block_size
     splitter_setting.use_experimental_rt = lower_setting.use_experimental_rt
     splitter = TRTSplitter(model, inputs, settings=splitter_setting)

diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py b/py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py
@@ -44,7 +44,6 @@ class LowerSetting(LowerSettingBasic):
     Args:
     input_specs: Specs for inputs to engine, can either be a single size or a
     range defined by Min, Optimal, Max sizes.
-    explicit_precision: Use explicit precision during lowering.
     workspace_size: The maximum workspace size. The maximum GPU temporary
     memory which the TensorRT engine can use at execution time.
     strict_type_constraints: Require TensorRT engine to strictly follow data type
@@ -76,8 +75,6 @@ class LowerSetting(LowerSettingBasic):
     """
 
     input_specs: List[InputTensorSpec] = dc.field(default_factory=list)
-    explicit_batch_dimension: bool = True
-    explicit_precision: bool = False
     workspace_size: int = 0
     strict_type_constraints: bool = False
     customized_fuse_pass: PassManager = dc.field(