add helpful comments

venkywonka · venkywonka · commit f925fa97005a · 2025-09-15T10:49:58.000-07:00
Signed-off-by: Venky Ganesh &lt;23023424+venkywonka@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/executor/result.py b/tensorrt_llm/executor/result.py
@@ -244,13 +244,16 @@ def _handle_sequence(self,
         if response_tensors.cum_log_probs is not None:
             output.cumulative_logprob = response_tensors.cum_log_probs[src_idx]
 
-        if logprobs_result:
+        # prompt logprobs handling
+        if logprobs_result and logprobs_result.prompt is not None: # both backends
+            output.prompt_logprobs = logprobs_result.prompt
+        # generation logprobs handling (provenance varies by backend)
+        if logprobs_result and logprobs_result.generation is not None: # TRT backend
             # update logprobs from ResponseWrapper (TRT top logprobs WAR)
             output._last_logprobs_len = len(output.logprobs)
-            output.prompt_logprobs = logprobs_result.prompt
             output.logprobs += logprobs_result.generation
-        elif response_tensors.log_probs is not None:
-            # handle logprobs directly from response tensors
+        elif response_tensors.log_probs is not None: # PyTorch backend
+            # handle logprobs directly from response tensors given by sampler
             output._last_logprobs_len = len(output.logprobs)
             output.logprobs = response_tensors.log_probs[src_idx]
             # overcome some WAR in the cpp executor
@@ -701,12 +704,12 @@ def compute_logprobs(
     output_token_ids: Optional[list[int]],
 ) -> LogProbsResult:
     """
-    Compute top-K logprobs and ranks for each token position.
+    Compute top-K logprobs from logits when engine doesn't provide them directly.
 
-    Returns:
-        LogProbsResult, a NamedTuple containing:
-            - prompt: Optional[List[Dict[token_id, Logprob]]] logprobs for prompt tokens.
-            - generation: Optional[List[Dict[token_id, Logprob]]] logprobs for generated tokens.
+    Used for post-processing logits into logprobs.
+    - Prompt logprobs (from context_logits): always used.
+    - Generation logprobs (from generation_logits, TRT backend): used when backend doesn't compute them in sampler (e.g., TRT).
+    - Generation logprobs (PyTorch backend): not used; computed in sampler, not here.
     """
 
     def _topk_logprobs(logits: torch.Tensor, top_k: int,
diff --git a/tensorrt_llm/executor/worker.py b/tensorrt_llm/executor/worker.py
@@ -1056,7 +1056,11 @@ def _get_params_for_first_rsp(
 def _get_logprobs(worker,
                   response: Union[tllm.Response, LlmResponse],
                   is_pytorch_backend=False) -> Optional[LogProbsResult]:
-    """Compute logprob and prompt logprob and clear out logits if applicable.
+    """Compute logprobs from response logits when needed.
+
+    Logprobs provenance varies by backend:
+    - PyTorch: Generation logprobs computed in sampler, only prompt logprobs computed here
+    - TRT: Both prompt and generation logprobs computed here from logits
     """
 
     logprobs_result = None
@@ -1069,10 +1073,14 @@ def _get_logprobs(worker,
     if logprob_params:
         if is_pytorch_backend:
             if not logprob_params.prompt_logprobs:
-                # generation logprobs are already calculated in PyTorch backend sampler
+                # PyTorch: generation logprobs computed in sampler, no post-processing needed
                 return
             else:
-                # Fallback: compute from context_logits if available
+                # PyTorch: compute only prompt logprobs from context logits
+                # This can be done as a postprocessing step instead of coupling it to the
+                # pytorch engine, because prompt_logprobs calculation is not complicated by
+                # generation sampling strategies. Therefore it is simpler to do it here than
+                # doing it in the pytorch engine and plumbing it through the response.
                 logprobs_result = compute_logprobs(
                     logprob_params.prompt_logprobs, None,
                     response.result.context_logits, None, None)
@@ -1082,7 +1090,7 @@ def _get_logprobs(worker,
                     response.clear_context_logits()
                 return logprobs_result
 
-        # trt backend
+        # TRT backend: compute both prompt and generation logprobs from logits
         logprobs_result = compute_logprobs(logprob_params.prompt_logprobs,
                                            logprob_params.logprobs,
                                            response.result.context_logits,
diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py
@@ -574,12 +574,6 @@ def _check_arguments(self, prompt_len: int, query_len: int,
                          is_gen_only: bool) -> None:
 
         if self.args.backend in ["pytorch", "_autodeploy"]:
-            # TODO: remove these checks after PyTorch backend
-            # fully support TopK prompt and generation logprobs.
-            # if sampling_params.prompt_logprobs:
-            #     raise ValueError(
-            #         f"`prompt_logprobs` in sampling_params is not supported in the PyTorch backend yet. Received `prompt_logprobs={sampling_params.prompt_logprobs}`. Please unset this field."
-            #     )
             if sampling_params.logprobs and sampling_params.logprobs > 1:
                 raise ValueError(
                     f"PyTorch backend currently only supports `logprobs=1`. Received `logprobs={sampling_params.logprobs}` (Top{sampling_params.logprobs} logprobs). Please set `logprobs=1` in `sampling_params` instead."
diff --git a/tensorrt_llm/sampling_params.py b/tensorrt_llm/sampling_params.py
@@ -8,6 +8,7 @@
 from pydantic import BaseModel
 
 from tensorrt_llm.bindings import executor as tllme
+from tensorrt_llm.logger import logger
 
 
 @dataclass(slots=True, kw_only=True)
@@ -453,6 +454,11 @@ def _get_output_config(self, is_pytorch_backend: bool = False) -> tllme.OutputCo
             # we need to internally enable context logits for prompt logprobs computation
             # They will be dropped after computation if the user didn't explicitly request them
             if self.prompt_logprobs and not self.return_context_logits:
+                logger.info(
+                    "Since prompt_logprobs is requested but return_context_logits is False, "
+                    "internally enabling context logits for prompt logprobs computation. "
+                    "context logits will be dropped after computation as the user didn't explicitly request them."
+                )
                 config_kwargs["return_context_logits"] = True
         else:
             config_kwargs["return_log_probs"] = self._return_log_probs