@@ -20,7 +20,12 @@ def forward(self, input):
20
20
return torch .ops .aten .nonzero .default (input )
21
21
22
22
23
- class NonDDSModel (torch .nn .Module ):
23
+ class DDSOpWithReductionOpModel (torch .nn .Module ):
24
+ """
25
+ DDSOpWithReductionOpModel is a model that contains DDS op + reduction op.
26
+ Since nonzero requires output allocator, this model will use output allocator by default.
27
+ """
28
+
24
29
def forward (self , inputs ):
25
30
out = torch .ops .aten .nonzero .default (inputs )
26
31
out = torch .ops .aten .sum .dim_IntList (out , 0 )
@@ -251,9 +256,9 @@ def test_combination_of_cg_and_oa(self, _, use_python_runtime):
251
256
out = cudagraphs_module (* inputs )
252
257
253
258
254
- class TestOutputAllocatorNonDDSModel (TestCase ):
259
+ class TestOutputAllocatorDDSOpWithReductionOpModel (TestCase ):
255
260
"""
256
- The NonDDSModel is a model that contains DDS op + reduction op.
261
+ The DDSOpWithReductionOpModel is a model that contains DDS op + reduction op.
257
262
"""
258
263
259
264
@parameterized .expand (
@@ -263,7 +268,7 @@ class TestOutputAllocatorNonDDSModel(TestCase):
263
268
]
264
269
)
265
270
def test_cudagraphs_and_output_allocator (self , _ , use_python_runtime ):
266
- model = NonDDSModel ().eval ().cuda ()
271
+ model = DDSOpWithReductionOpModel ().eval ().cuda ()
267
272
inputs = (torch .randint (low = 0 , high = 3 , size = (10 ,), dtype = torch .int ).to ("cuda" ),)
268
273
compiled_model = torch_tensorrt .compile (
269
274
model ,
@@ -302,9 +307,9 @@ def test_cudagraphs_and_output_allocator(self, _, use_python_runtime):
302
307
)
303
308
def test_default (self , _ , use_python_runtime ):
304
309
"""
305
- NonDDS models use standard execution with cudagraphs=False by default .
310
+ The DDSOpWithReductionOpModel is a model that contains nonzero op + reduction op, in which nonzero op requires output allocator .
306
311
"""
307
- model = NonDDSModel ().eval ().cuda ()
312
+ model = DDSOpWithReductionOpModel ().eval ().cuda ()
308
313
inputs = (torch .randint (low = 0 , high = 3 , size = (10 ,), dtype = torch .int ).to ("cuda" ),)
309
314
compiled_model = torch_tensorrt .compile (
310
315
model ,
@@ -313,11 +318,11 @@ def test_default(self, _, use_python_runtime):
313
318
min_block_size = 1 ,
314
319
use_python_runtime = use_python_runtime ,
315
320
)
316
- standard_out = compiled_model (* inputs )
321
+ oa_out = compiled_model (* inputs )
317
322
ref_out = model (* inputs )
318
323
319
324
self .assertAlmostEqual (
320
- float (torch .max (torch .abs (ref_out - standard_out ))),
325
+ float (torch .max (torch .abs (ref_out - oa_out ))),
321
326
0 ,
322
327
DECIMALS_OF_AGREEMENT ,
323
328
msg = "Default Output Allocator runtime outputs don't match with the original model." ,
@@ -330,7 +335,7 @@ def test_default(self, _, use_python_runtime):
330
335
]
331
336
)
332
337
def test_combination_of_cg_and_oa (self , _ , use_python_runtime ):
333
- model = NonDDSModel ().eval ().cuda ()
338
+ model = DDSOpWithReductionOpModel ().eval ().cuda ()
334
339
inputs = (torch .randint (low = 0 , high = 3 , size = (10 ,), dtype = torch .int ).to ("cuda" ),)
335
340
compiled_model = torch_tensorrt .compile (
336
341
model ,
0 commit comments