fix: Add truncate_long_and_double to Dynamo

gs-olive · gs-olive · commit 9e2502a57b07 · 2023-05-24T13:00:44.000-07:00
- Add pass-through ability for feature to Dynamo compile frontend
diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py
@@ -16,6 +16,7 @@
     DEBUG,
     MAX_WORKSPACE_SIZE,
     MIN_BLOCK_SIZE,
+    TRUNCATE_LONG_AND_DOUBLE,
 )
 
 
@@ -39,7 +40,7 @@ def compile(
     dla_local_dram_size=1073741824,
     dla_global_dram_size=536870912,
     calibrator=None,
-    truncate_long_and_double=False,
+    truncate_long_and_double=TRUNCATE_LONG_AND_DOUBLE,
     require_full_compilation=False,
     min_block_size=MIN_BLOCK_SIZE,
     torch_executed_ops=[],
@@ -50,7 +51,8 @@ def compile(
     logger.warn(
         "The Dynamo backend is an experimental feature, for which only the "
         + "following arguments are supported: "
-        + "{enabled_precisions, debug, workspace_size, min_block_size, torch_executed_ops}"
+        + "{enabled_precisions, debug, workspace_size, "
+        + "truncate_long_and_double, min_block_size, torch_executed_ops}"
     )
 
     if not isinstance(inputs, collections.abc.Sequence):
@@ -82,6 +84,7 @@ def compile(
         workspace_size=workspace_size,
         min_block_size=min_block_size,
         torch_executed_ops=torch_executed_ops,
+        truncate_long_and_double=truncate_long_and_double,
         **kwargs,
     )
 
@@ -104,6 +107,7 @@ def create_backend(
     workspace_size: int = MAX_WORKSPACE_SIZE,
     min_block_size: int = MIN_BLOCK_SIZE,
     torch_executed_ops: Sequence[str] = set(),
+    truncate_long_and_double: bool = TRUNCATE_LONG_AND_DOUBLE,
     **kwargs,
 ):
     """Create torch.compile backend given specified arguments
@@ -122,6 +126,7 @@ def create_backend(
         workspace_size=workspace_size,
         min_block_size=min_block_size,
         torch_executed_ops=torch_executed_ops,
+        truncate_long_and_double=truncate_long_and_double,
     )
 
     return partial(
diff --git a/py/torch_tensorrt/dynamo/backend/_defaults.py b/py/torch_tensorrt/dynamo/backend/_defaults.py
@@ -5,3 +5,4 @@
 DEBUG = False
 MAX_WORKSPACE_SIZE = 20 << 30
 MIN_BLOCK_SIZE = 5
+TRUNCATE_LONG_AND_DOUBLE = False
diff --git a/py/torch_tensorrt/dynamo/backend/_settings.py b/py/torch_tensorrt/dynamo/backend/_settings.py
@@ -7,6 +7,7 @@
     DEBUG,
     MAX_WORKSPACE_SIZE,
     MIN_BLOCK_SIZE,
+    TRUNCATE_LONG_AND_DOUBLE,
 )
 
 
@@ -17,3 +18,4 @@ class CompilationSettings:
     workspace_size: int = MAX_WORKSPACE_SIZE
     min_block_size: int = MIN_BLOCK_SIZE
     torch_executed_ops: Sequence[str] = field(default_factory=set)
+    truncate_long_and_double: bool = TRUNCATE_LONG_AND_DOUBLE
diff --git a/py/torch_tensorrt/dynamo/backend/conversion.py b/py/torch_tensorrt/dynamo/backend/conversion.py
@@ -2,45 +2,42 @@
 import torch
 from torch_tensorrt.fx.trt_module import TRTModule
 from torch_tensorrt import TRTModuleNext
+from torch_tensorrt.dynamo.backend._settings import CompilationSettings
 from torch_tensorrt.fx.fx2trt import (
     InputTensorSpec,
     TRTInterpreter,
 )
-from torch_tensorrt.fx.utils import LowerPrecision
 
 import tensorrt as trt
 
 
 def convert_module(
     module: torch.fx.GraphModule,
     inputs: Sequence[torch.Tensor],
-    debug: bool = False,
-    workspace_size: int = 20 << 30,
-    precision: LowerPrecision = LowerPrecision.FP32,
+    settings: CompilationSettings = CompilationSettings(),
 ) -> Union[TRTModuleNext, TRTModule]:
     """Convert an FX module to a TRT module
     Args:
         module: FX GraphModule to convert
         inputs: Sequence of Tensors representing inputs to the module
-        debug: Whether to print out verbose debugging information
-        workspace_size: Maximum workspace TRT is allowed to use for the module
-        precision: Model Layer precision
+        settings: Compilation settings
     Returns:
         TRTModule or TRTModuleNext
     """
     interp = TRTInterpreter(
         module,
         InputTensorSpec.from_tensors(inputs),
         explicit_batch_dimension=True,
-        logger_level=(trt.Logger.VERBOSE if debug else trt.Logger.WARNING),
+        logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
+        truncate_long_and_double=settings.truncate_long_and_double,
     )
 
     r = interp.run(
-        max_workspace_size=workspace_size,
-        lower_precision=precision,
+        max_workspace_size=settings.workspace_size,
+        lower_precision=settings.precision,
         profiling_verbosity=(
             trt.ProfilingVerbosity.VERBOSE
-            if debug
+            if settings.debug
             else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
         ),
     )