Skip to content

Commit f6c7b25

Browse files
committed
fix: multi-GPU gather responses
Signed-off-by: Olya Kozlova <[email protected]>
1 parent 9c3525f commit f6c7b25

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,15 @@ def _get_from_waiting_queue(
123123
req_item.child_req_ids) if req_item.child_req_ids else 0
124124
if (req_count + 1 + num_children) > max_req_count:
125125
break
126-
req_count += 1 + num_children
127126
req_item = waiting_queue.popleft()
128127
can_process = self._can_process_attention_dp_request(
129128
req_item, scheduling_all_ranks_num_active_requests
130129
) if enable_attention_dp else True
131130

132131
if can_process:
133132
items.append(req_item)
134-
else:
133+
req_count += 1 + num_children
134+
else:
135135
pending_requests.append(req_item)
136136

137137
# Put the pending requests back to the waiting queue

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ def _enqueue_responses(self, responses: List[Tuple[int, LlmResponse]]):
14511451
if responses_list is not None:
14521452
for resp in responses_list:
14531453
if resp is not None:
1454-
gather_responses.append(resp)
1454+
gather_responses.extend(resp)
14551455
responses = gather_responses
14561456
logger.debug(
14571457
f'after gather, rank = {self.dist.rank}, responses = {responses}')

0 commit comments

Comments
 (0)