Skip to content

fix: Repair argument passing in both Dynamo paths #1997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions py/torch_tensorrt/dynamo/backend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def compile(
min_block_size=MIN_BLOCK_SIZE,
torch_executed_ops=[],
torch_executed_modules=[],
pass_through_build_failures=PASS_THROUGH_BUILD_FAILURES,
**kwargs,
):
if debug:
Expand Down Expand Up @@ -86,6 +87,7 @@ def compile(
workspace_size=workspace_size,
min_block_size=min_block_size,
torch_executed_ops=torch_executed_ops,
pass_through_build_failures=pass_through_build_failures,
**kwargs,
)

Expand Down
1 change: 0 additions & 1 deletion py/torch_tensorrt/dynamo/backend/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def convert_module(
interpreter = TRTInterpreter(
module,
InputTensorSpec.from_tensors(inputs),
explicit_batch_dimension=True,
logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
output_dtypes=output_dtypes,
)
Expand Down
16 changes: 4 additions & 12 deletions py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def __init__(
self,
module: torch.fx.GraphModule,
input_specs: List[InputTensorSpec],
explicit_batch_dimension: bool = True,
explicit_precision: bool = False,
logger_level=None,
output_dtypes=None,
):
Expand All @@ -49,17 +47,11 @@ def __init__(
self.builder = trt.Builder(self.logger)

flag = 0
if explicit_batch_dimension:
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH
)
flag |= EXPLICIT_BATCH

if explicit_precision:
EXPLICIT_PRECISION = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION
)
flag |= EXPLICIT_PRECISION
# It is deprecated to not use this flag
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
flag |= EXPLICIT_BATCH

self.network = self.builder.create_network(flag)

missing_ops = self.validate_conversion()
Expand Down
18 changes: 10 additions & 8 deletions py/torch_tensorrt/dynamo/fx_ts_compat/lower.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def compile(
cuda_graph_batch_size=-1,
is_aten=False,
use_experimental_fx_rt=False,
max_aux_streams=None,
version_compatible=False,
optimization_level=None,
num_avg_timing_iters=1,
torch_executed_ops=[],
torch_executed_modules=[],
Expand All @@ -68,14 +71,12 @@ def compile(
save_timing_cache: Update timing cache with current timing cache data if set to True.
cuda_graph_batch_size: Cuda graph batch size, default to be -1.
use_experimental_fx_rt: Uses the next generation TRTModule which supports both Python and TorchScript based execution (including in C++).
max_aux_streams: max number of aux stream to use
version_compatible: enable version compatible feature
optimization_level: builder optimization level
Returns:
A torch.nn.Module lowered by TensorRT.
"""
if use_experimental_fx_rt and not explicit_batch_dimension:
raise ValueError(
"The experimental unifed runtime only supports explicit batch. Please make sure to set explicit_batch_dimension=True when use_experimental_fx_rt=True"
)

logger.warn(
"For ir=fx_ts_compat backend only the "
+ "following arguments are supported: "
Expand Down Expand Up @@ -123,6 +124,9 @@ def compile(
cuda_graph_batch_size=cuda_graph_batch_size,
is_aten=is_aten,
use_experimental_rt=use_experimental_fx_rt,
max_aux_streams=max_aux_streams,
version_compatible=version_compatible,
optimization_level=optimization_level,
)
lowerer = Lowerer.create(lower_setting=lower_setting)
return lowerer(module, inputs)
Expand Down Expand Up @@ -162,8 +166,6 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
interpreter = TRTInterpreter(
mod,
input_specs=self.lower_setting.input_specs,
explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
explicit_precision=self.lower_setting.explicit_precision,
logger_level=trt.Logger.VERBOSE
if self.lower_setting.debug
else trt.Logger.WARNING,
Expand Down Expand Up @@ -198,7 +200,7 @@ def default_split_function(
model: fx.GraphModule, inputs: Input, lower_setting: LowerSetting
) -> SplitResult:
splitter_setting = TRTSplitterSetting()
splitter_setting.use_implicit_batch_dim = not lower_setting.explicit_batch_dimension
splitter_setting.use_implicit_batch_dim = False
splitter_setting.min_block_size = lower_setting.min_block_size
splitter_setting.use_experimental_rt = lower_setting.use_experimental_rt
splitter = TRTSplitter(model, inputs, settings=splitter_setting)
Expand Down
3 changes: 0 additions & 3 deletions py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ class LowerSetting(LowerSettingBasic):
Args:
input_specs: Specs for inputs to engine, can either be a single size or a
range defined by Min, Optimal, Max sizes.
explicit_precision: Use explicit precision during lowering.
workspace_size: The maximum workspace size. The maximum GPU temporary
memory which the TensorRT engine can use at execution time.
strict_type_constraints: Require TensorRT engine to strictly follow data type
Expand Down Expand Up @@ -76,8 +75,6 @@ class LowerSetting(LowerSettingBasic):
"""

input_specs: List[InputTensorSpec] = dc.field(default_factory=list)
explicit_batch_dimension: bool = True
explicit_precision: bool = False
workspace_size: int = 0
strict_type_constraints: bool = False
customized_fuse_pass: PassManager = dc.field(
Expand Down