fix: Add support for truncate_long_and_double in FX

gs-olive · gs-olive · commit 65bc3600abba · 2023-04-28T15:30:54.000-07:00
- Add support and testing for `double` type inputs
diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py
@@ -40,6 +40,7 @@ def __init__(
         explicit_batch_dimension: bool = False,
         explicit_precision: bool = False,
         logger_level=None,
+        truncate_long_and_double=False,
     ):
         super().__init__(module)
 
@@ -69,6 +70,7 @@ def __init__(
 
         self.optimization_profiles: Optional[List] = None
         self.input_specs = input_specs
+        self.truncate_long_and_double = truncate_long_and_double
         self.input_specs_iter = 0
         self.validate_input_specs()
         self._cur_node_name: Optional[str] = None
@@ -300,7 +302,9 @@ def placeholder(self, target, args, kwargs):
                 self.optimization_profiles[i].set_shape(target, *shape_range)
 
         return self.network.add_input(
-            name=target, shape=tuple(shape), dtype=torch_dtype_to_trt(dtype)
+            name=target,
+            shape=tuple(shape),
+            dtype=torch_dtype_to_trt(dtype, self.truncate_long_and_double),
         )
 
     def call_module(self, target, args, kwargs):
diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py
@@ -41,6 +41,7 @@ def compile(
     dynamic_batch=True,
     is_aten=False,
     use_experimental_fx_rt=False,
+    truncate_long_and_double=False,
 ) -> nn.Module:
     """
     Takes in original module, input and lowering setting, run lowering workflow to turn module
@@ -60,6 +61,7 @@ def compile(
         cuda_graph_batch_size: Cuda graph batch size, default to be -1.
         dynamic_batch: batch dimension (dim=0) is dynamic.
         use_experimental_fx_rt: Uses the next generation TRTModule which supports both Python and TorchScript based execution (including in C++).
+        truncate_long_and_double: Whether to automatically truncate long and double-type tensor inputs to TRT Engines
     Returns:
         A torch.nn.Module lowered by TensorRT.
     """
@@ -81,6 +83,7 @@ def compile(
         dynamic_batch=dynamic_batch,
         is_aten=is_aten,
         use_experimental_rt=use_experimental_fx_rt,
+        truncate_long_and_double=truncate_long_and_double,
     )
     lowerer = Lowerer.create(lower_setting=lower_setting)
     return lowerer(module, input)
@@ -125,6 +128,7 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
             logger_level=trt.Logger.VERBOSE
             if self.lower_setting.verbose_log
             else trt.Logger.WARNING,
+            truncate_long_and_double=self.lower_setting.truncate_long_and_double,
         )
 
         interp_result: TRTInterpreterResult = interpreter.run(
diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py
@@ -101,3 +101,4 @@ class LowerSetting(LowerSettingBasic):
     correctness_atol: float = 0.1
     correctness_rtol: float = 0.1
     use_experimental_rt: bool = False
+    truncate_long_and_double: bool = False
diff --git a/py/torch_tensorrt/fx/test/core/test_trt_module.py b/py/torch_tensorrt/fx/test/core/test_trt_module.py
@@ -199,5 +199,62 @@ def forward(self, x):
         )
 
 
+class TestTRTModuleFloat64Input(TestCase):
+    def test_save_and_load_trt_module(self):
+        class TestModule(torch.nn.Module):
+            def forward(self, x):
+                return x + x
+
+        inputs = [torch.randn(5, 5).double()]
+        mod = TestModule().eval()
+        ref_output = mod(*inputs)
+
+        mod = acc_tracer.trace(mod, inputs)
+        interp = TRTInterpreter(
+            mod,
+            input_specs=InputTensorSpec.from_tensors(inputs),
+        )
+        trt_mod = TRTModule(*interp.run(lower_precision=LowerPrecision.FP32))
+        torch.save(trt_mod, "trt.pt")
+        reload_trt_mod = torch.load("trt.pt")
+
+        torch.testing.assert_close(
+            reload_trt_mod(inputs[0].cuda()).cpu(),
+            ref_output,
+            rtol=1e-04,
+            atol=1e-04,
+            check_dtype=False,
+        )
+        os.remove(f"{os.getcwd()}/trt.pt")
+
+    def test_save_and_load_state_dict(self):
+        class TestModule(torch.nn.Module):
+            def forward(self, x):
+                return x + x
+
+        inputs = [torch.randn(5, 5).double()]
+        mod = TestModule().eval()
+        ref_output = mod(*inputs)
+
+        mod = acc_tracer.trace(mod, inputs)
+        interp = TRTInterpreter(
+            mod,
+            input_specs=InputTensorSpec.from_tensors(inputs),
+        )
+        trt_mod = TRTModule(*interp.run(lower_precision=LowerPrecision.FP32))
+        st = trt_mod.state_dict()
+
+        new_trt_mod = TRTModule()
+        new_trt_mod.load_state_dict(st)
+
+        torch.testing.assert_close(
+            new_trt_mod(inputs[0].cuda()).cpu(),
+            ref_output,
+            rtol=1e-04,
+            atol=1e-04,
+            check_dtype=False,
+        )
+
+
 if __name__ == "__main__":
     run_tests()
diff --git a/py/torch_tensorrt/fx/trt_module.py b/py/torch_tensorrt/fx/trt_module.py
@@ -156,6 +156,15 @@ def forward(self, *inputs):
                         inputs = (
                             inputs[:i] + (inputs[i].to(torch.int32),) + inputs[i + 1 :]
                         )
+                    elif (
+                        inputs[i].dtype == torch.float64
+                        and self.input_dtypes[i] == torch.float32
+                    ):
+                        inputs = (
+                            inputs[:i]
+                            + (inputs[i].to(torch.float32),)
+                            + inputs[i + 1 :]
+                        )
 
                     assert (
                         inputs[i].dtype == self.input_dtypes[i]
diff --git a/py/torch_tensorrt/fx/utils.py b/py/torch_tensorrt/fx/utils.py
@@ -25,7 +25,9 @@ class LowerPrecision(Enum):
     INT8 = "int8"
 
 
-def torch_dtype_to_trt(dtype: torch.dtype) -> TRTDataType:
+def torch_dtype_to_trt(
+    dtype: torch.dtype, truncate_long_and_double: bool = False
+) -> TRTDataType:
     """
     Convert PyTorch data types to TensorRT data types.
 
@@ -42,14 +44,31 @@ def torch_dtype_to_trt(dtype: torch.dtype) -> TRTDataType:
     elif dtype == torch.int32:
         return trt.int32
     elif dtype == torch.int64:
-        _LOGGER.warn(
-            "Detected Int64 Input, Casting to Int32 for TRT Engine Compatibility"
-        )
-        return trt.int32
+        if truncate_long_and_double:
+            _LOGGER.warn(
+                "Detected Int64 Input, Casting to Int32 for TRT Engine Compatibility"
+            )
+            return trt.int32
+        else:
+            raise AssertionError(
+                "Detected Int64 Input, enable truncate_long_and_double=True to cast "
+                + "input to Int32 for TRT Engine"
+            )
     elif dtype == torch.float16:
         return trt.float16
     elif dtype == torch.float32:
         return trt.float32
+    elif dtype == torch.float64:
+        if truncate_long_and_double:
+            _LOGGER.warn(
+                "Detected Float64 Input, Casting to Float32 for TRT Engine Compatibility"
+            )
+            return trt.float32
+        else:
+            raise AssertionError(
+                "Detected Float64 Input, enable truncate_long_and_double=True to cast "
+                + "input to Float32 for TRT Engine"
+            )
     else:
         raise TypeError("%s is not supported by tensorrt" % dtype)