From 4cf7ea6db44330e8ff0202207ec824e75a313f02 Mon Sep 17 00:00:00 2001 From: Wei Wei Date: Tue, 14 Jun 2022 17:53:41 -0700 Subject: [PATCH] Changes done internally at Facebook 364639a8ab2ee7531ce5259b8985a3c90bda4fdf Wei Wei [fx2trt] target files added 07d8e842b54b9c727f4215239f6c007cc7a62c9f Wei Wei Swap fx2trt_oss to torch_tensorrt 74731c90fd63e41ff5997887d8f72ca0b805cf8d Yinghai Lu Fix uru_10x10 test 6c53d36a08a7d465a1108d7154ef29a373eb38cc Wei Wei [fx2trt] Modify lower setting class to accommandate AIT lowering 6f873f4f3ece9d476479eb7c9633d38554dd8692 Oleg Khabinov [fx2trt] Make sure acc_tracer belongs only to single target 529a5750ace2bede6e9b7a9922a0f75c459df16b Shirong Wu Enable explicit batch dim for MTS gpu benchmark 2d284df94ddb530f3a8875fdc76796fad508ec29 Wei Wei [fx2trt] remove wildcard for obj of torch_fx2trt in TARGETS 84b53b15427cc08fb1e36143b6bdec4557f50d7e Shirong Wu Add var converter 17e309b17b3ba66cda0e7d5712089d860a5e125e Jordan Fix [const_fold] Set requires_grad based on the folded tensor; add device_for_folding option 2c8f1b23be30ec968ad27215256d250c872616b0 Kefei Lu lowering: support creating lowerer instance with "presets" 50fa26d1b56888ec25eb839d4813bc695be20da9 wwei6 [fx2trt] target files added 6e7f9b6c4f8afa32383c457e8133674640348810 wwei6 fx2trt_oss change set1 f3ee8a4b482a35edc2786cd97bc0d07e9af6a23e wwei6 Automatic update of fbcode/deeplearning/trt/torch_tensorrt to 666a2637d23f4fea70fef6b5b03f39f8a40146bc --- py/torch_tensorrt/fx/lower.py | 13 ++++++++++++- py/torch_tensorrt/fx/lower_setting.py | 7 +++++++ py/torch_tensorrt/fx/passes/lower_basic_pass.py | 4 +++- .../fx/test/tracer/test_acc_tracer.py | 1 + py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py | 15 +++++++++++++++ 5 files changed, 38 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py index 601408fceb..318136be56 100644 --- a/py/torch_tensorrt/fx/lower.py +++ b/py/torch_tensorrt/fx/lower.py @@ -92,7 +92,18 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult: input_specs_val = ( self.lower_setting.input_specs if self.lower_setting.input_specs - else InputTensorSpec.from_tensors(input) + else ( + InputTensorSpec.from_tensors_with_dynamic_batch_size( + input, + ( + 0, + self.lower_setting.max_batch_size, + self.lower_setting.max_batch_size, + ), + ) + if self.lower_setting.explicit_batch_dimension + else InputTensorSpec.from_tensors(input) + ) ) # Prepare algorithm selector and timing_cache for TRTInterpreter diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py index b9cbb2630d..b1a32c2cff 100644 --- a/py/torch_tensorrt/fx/lower_setting.py +++ b/py/torch_tensorrt/fx/lower_setting.py @@ -63,6 +63,12 @@ class LowerSetting(LowerSettingBasic): save_timing_cache: Save updated timing cache data into timing cache file if the timing cache file is provided. cuda_graph_batch_size (int): Cuda graph batch size, default to be -1. + preset_lowerer (str): when specified, use a preset logic to build the + instance of Lowerer. Refer to + `caffe2.torch.fb.model_transform.fx2trt.presets.LowererPresetsManager` on + how presets are applied. Refer to + `caffe2.torch.fb.model_transform.fx2trt.presets.ESUHMLowererPreset` on how + to add a preset. """ input_specs: List[InputTensorSpec] = dc.field(default_factory=list) @@ -79,3 +85,4 @@ class LowerSetting(LowerSettingBasic): timing_cache_prefix: str = "" save_timing_cache: bool = False cuda_graph_batch_size: int = -1 + preset_lowerer: str = "" diff --git a/py/torch_tensorrt/fx/passes/lower_basic_pass.py b/py/torch_tensorrt/fx/passes/lower_basic_pass.py index 6dc2e86f22..4394ca97b4 100644 --- a/py/torch_tensorrt/fx/passes/lower_basic_pass.py +++ b/py/torch_tensorrt/fx/passes/lower_basic_pass.py @@ -31,7 +31,9 @@ def skip_folding_quant_dequant(node: torch.fx.Node): return True return False - const_split_mod = split_const_subgraphs(traced_mod, skip_folding_quant_dequant) + const_split_mod = split_const_subgraphs( + traced_mod, skip_folding_quant_dequant, device_for_folded_attrs="cuda" + ) const_split_mod.run_folding() return const_split_mod diff --git a/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py b/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py index 231b8eed0c..a78329c9ef 100644 --- a/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py +++ b/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py @@ -2576,5 +2576,6 @@ def test_all_acc_ops_registered(self): acc_ops.new_ones, acc_ops.einsum, acc_ops.as_strided, + acc_ops.var, }, ) diff --git a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py index df6480166b..b28bf263c2 100644 --- a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py +++ b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py @@ -2864,3 +2864,18 @@ def as_strided(*, input, size, stride, storage_offset=0): return torch.as_strided( input=input, size=size, stride=stride, storage_offset=storage_offset ) + + +@register_acc_op_mapping(op_and_target=("call_function", torch.var)) +@register_acc_op_mapping( + op_and_target=("call_method", "var"), + arg_replacement_tuples=[ + ("input", "input"), + ("dim", "dim"), + ("unbiased", "unbiased"), + ("keepdim", "keepdim"), + ], +) +@register_acc_op +def var(*, input, dim, unbiased, keepdim=False): + return torch.var(input=input, dim=dim, unbiased=unbiased, keepdim=keepdim)