Skip to content

Commit 58302a4

Browse files
committed
Remove unused import
Signed-off-by: ajrasane <[email protected]>
1 parent d46e040 commit 58302a4

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

tensorrt_llm/bench/benchmark/throughput.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ def ignore_trt_only_args(kwargs: dict):
450450
elif runtime_config.backend == "_autodeploy":
451451
ignore_trt_only_args(kwargs)
452452
kwargs["world_size"] = kwargs.pop("tensor_parallel_size", None)
453+
453454
llm = AutoDeployLLM(**kwargs)
454455
else:
455456
llm = LLM(**kwargs)

tests/integration/defs/accuracy/test_llm_api_autodeploy.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
from tensorrt_llm import LLM
1919
from tensorrt_llm._torch.auto_deploy import LLM as AutoDeployLLM
20-
from tensorrt_llm.llmapi.llm_args import (CapacitySchedulerPolicy,
21-
ContextChunkingPolicy)
2220
from tensorrt_llm.quantization import QuantAlgo
2321
from tensorrt_llm.sampling_params import SamplingParams
2422

@@ -40,7 +38,7 @@ def get_default_kwargs(self):
4038
'max_batch_size': 512,
4139
# 131072 is the max seq len for the model
4240
'max_seq_len': 8192,
43-
# max num tokens is derived in the build_config, which is not used by AutoDeploy llmargs.
41+
# max num tokens is derived in the build_config, which is not used by AutoDeploy llmargs.
4442
# Set it explicitly here to 8192 which is the default in build_config.
4543
'max_num_tokens': 8192,
4644
'skip_loading_weights': False,

0 commit comments

Comments
 (0)