File tree Expand file tree Collapse file tree 2 files changed +2
-3
lines changed
tensorrt_llm/bench/benchmark
tests/integration/defs/accuracy Expand file tree Collapse file tree 2 files changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -450,6 +450,7 @@ def ignore_trt_only_args(kwargs: dict):
450
450
elif runtime_config .backend == "_autodeploy" :
451
451
ignore_trt_only_args (kwargs )
452
452
kwargs ["world_size" ] = kwargs .pop ("tensor_parallel_size" , None )
453
+
453
454
llm = AutoDeployLLM (** kwargs )
454
455
else :
455
456
llm = LLM (** kwargs )
Original file line number Diff line number Diff line change 17
17
18
18
from tensorrt_llm import LLM
19
19
from tensorrt_llm ._torch .auto_deploy import LLM as AutoDeployLLM
20
- from tensorrt_llm .llmapi .llm_args import (CapacitySchedulerPolicy ,
21
- ContextChunkingPolicy )
22
20
from tensorrt_llm .quantization import QuantAlgo
23
21
from tensorrt_llm .sampling_params import SamplingParams
24
22
@@ -40,7 +38,7 @@ def get_default_kwargs(self):
40
38
'max_batch_size' : 512 ,
41
39
# 131072 is the max seq len for the model
42
40
'max_seq_len' : 8192 ,
43
- # max num tokens is derived in the build_config, which is not used by AutoDeploy llmargs.
41
+ # max num tokens is derived in the build_config, which is not used by AutoDeploy llmargs.
44
42
# Set it explicitly here to 8192 which is the default in build_config.
45
43
'max_num_tokens' : 8192 ,
46
44
'skip_loading_weights' : False ,
You can’t perform that action at this time.
0 commit comments