File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -512,6 +512,10 @@ def create_py_executor_instance(
512
512
lora_config .trtllm_modules_to_hf_modules )
513
513
514
514
max_num_sequences = executor_config .max_batch_size * mapping .pp_size
515
+ # When max_num_sequences == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
516
+ # Enlarge slot and scheduler capacity to avoid DISAGG_GENERATION_INIT stuck in the scheduler.
517
+ if max_num_sequences == 1 and kv_cache_manager :
518
+ max_num_sequences += 1
515
519
516
520
resources [ResourceManagerType .SEQ_SLOT_MANAGER ] = SeqSlotManager (
517
521
max_num_sequences )
@@ -564,6 +568,10 @@ def create_py_executor_instance(
564
568
def create_torch_sampler_args (executor_config : ExecutorConfig , mapping : Mapping ,
565
569
* , max_seq_len : int , enable_mixed_sampler : bool ):
566
570
max_num_sequences = executor_config .max_batch_size * mapping .pp_size
571
+ # When max_num_sequences == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
572
+ # Enlarge sampler size to align with slot and scheduler capacity.
573
+ if max_num_sequences == 1 and executor_config .kv_cache_config :
574
+ max_num_sequences += 1
567
575
max_draft_len = (0 if executor_config .speculative_config is None else
568
576
executor_config .speculative_config .max_draft_len )
569
577
return TorchSampler .Args (
You can’t perform that action at this time.
0 commit comments