4
4
import torch_tensorrt
5
5
from functools import partial
6
6
7
- from typing import Any , Sequence
7
+ from typing import Any , Optional , Sequence
8
8
from torch_tensorrt import EngineCapability , Device
9
9
from torch_tensorrt .fx .utils import LowerPrecision
10
10
17
17
WORKSPACE_SIZE ,
18
18
MIN_BLOCK_SIZE ,
19
19
PASS_THROUGH_BUILD_FAILURES ,
20
+ MAX_AUX_STREAMS ,
21
+ VERSION_COMPATIBLE ,
22
+ OPTIMIZATION_LEVEL ,
20
23
USE_EXPERIMENTAL_RT ,
21
24
)
22
25
@@ -46,6 +49,9 @@ def compile(
46
49
min_block_size = MIN_BLOCK_SIZE ,
47
50
torch_executed_ops = [],
48
51
torch_executed_modules = [],
52
+ max_aux_streams = MAX_AUX_STREAMS ,
53
+ version_compatible = VERSION_COMPATIBLE ,
54
+ optimization_level = OPTIMIZATION_LEVEL ,
49
55
use_experimental_rt = USE_EXPERIMENTAL_RT ,
50
56
** kwargs ,
51
57
):
@@ -95,6 +101,9 @@ def compile(
95
101
workspace_size = workspace_size ,
96
102
min_block_size = min_block_size ,
97
103
torch_executed_ops = torch_executed_ops ,
104
+ max_aux_streams = max_aux_streams ,
105
+ version_compatible = version_compatible ,
106
+ optimization_level = optimization_level ,
98
107
use_experimental_rt = use_experimental_rt ,
99
108
** kwargs ,
100
109
)
@@ -119,6 +128,9 @@ def create_backend(
119
128
min_block_size : int = MIN_BLOCK_SIZE ,
120
129
torch_executed_ops : Sequence [str ] = set (),
121
130
pass_through_build_failures : bool = PASS_THROUGH_BUILD_FAILURES ,
131
+ max_aux_streams : Optional [int ] = MAX_AUX_STREAMS ,
132
+ version_compatible : bool = VERSION_COMPATIBLE ,
133
+ optimization_level : Optional [int ] = OPTIMIZATION_LEVEL ,
122
134
use_experimental_rt : bool = USE_EXPERIMENTAL_RT ,
123
135
** kwargs ,
124
136
):
@@ -131,6 +143,10 @@ def create_backend(
131
143
min_block_size: Minimum number of operators per TRT-Engine Block
132
144
torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage
133
145
pass_through_build_failures: Whether to fail on TRT engine build errors (True) or not (False)
146
+ max_aux_streams: Maximum number of allowed auxiliary TRT streams for each engine
147
+ version_compatible: Provide version forward-compatibility for engine plan files
148
+ optimization_level: Builder optimization 0-5, higher levels imply longer build time,
149
+ searching for more optimization options. TRT defaults to 3
134
150
use_experimental_rt: Whether to use the new experimental TRTModuleNext for TRT engines
135
151
Returns:
136
152
Backend for torch.compile
@@ -145,6 +161,9 @@ def create_backend(
145
161
min_block_size = min_block_size ,
146
162
torch_executed_ops = torch_executed_ops ,
147
163
pass_through_build_failures = pass_through_build_failures ,
164
+ max_aux_streams = max_aux_streams ,
165
+ version_compatible = version_compatible ,
166
+ optimization_level = optimization_level ,
148
167
use_experimental_rt = use_experimental_rt ,
149
168
)
150
169
0 commit comments