Skip to content

Commit c4dd3c1

Browse files
committed
Enlarge scheduler and slot manager capacity under disagg bs==1
Signed-off-by: Yifei Zhang <[email protected]>
1 parent 7625845 commit c4dd3c1

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

tensorrt_llm/_torch/pyexecutor/_util.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,10 @@ def create_py_executor_instance(
512512
lora_config.trtllm_modules_to_hf_modules)
513513

514514
max_num_sequences = executor_config.max_batch_size * mapping.pp_size
515+
# When max_num_sequences == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
516+
# Enlarge slot and scheduler capacity to avoid DISAGG_GENERATION_INIT stuck in the scheduler.
517+
if max_num_sequences == 1 and kv_cache_manager:
518+
max_num_sequences += 1
515519

516520
resources[ResourceManagerType.SEQ_SLOT_MANAGER] = SeqSlotManager(
517521
max_num_sequences)
@@ -564,6 +568,10 @@ def create_py_executor_instance(
564568
def create_torch_sampler_args(executor_config: ExecutorConfig, mapping: Mapping,
565569
*, max_seq_len: int, enable_mixed_sampler: bool):
566570
max_num_sequences = executor_config.max_batch_size * mapping.pp_size
571+
# When max_num_sequences == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
572+
# Enlarge sampler size to align with slot and scheduler capacity.
573+
if max_num_sequences == 1 and executor_config.kv_cache_config:
574+
max_num_sequences += 1
567575
max_draft_len = (0 if executor_config.speculative_config is None else
568576
executor_config.speculative_config.max_draft_len)
569577
return TorchSampler.Args(

0 commit comments

Comments
 (0)