fix: multi-GPU gather responses

evezhier · evezhier · commit faa0ec2dbc36 · 2025-08-03T20:58:14.000+02:00
Signed-off-by: Olya Kozlova &lt;okozlova@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -1451,7 +1451,7 @@ def _enqueue_responses(self, responses: List[Tuple[int, LlmResponse]]):
                 if responses_list is not None:
                     for resp in responses_list:
                         if resp is not None:
-                            gather_responses.append(resp)
+                            gather_responses.extend(resp)
                     responses = gather_responses
         logger.debug(
             f'after gather, rank = {self.dist.rank}, responses = {responses}')