File tree Expand file tree Collapse file tree 3 files changed +2
-19
lines changed
include/tensorrt_llm/batch_manager
tensorrt_llm/pybind/batch_manager
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 3 files changed +2
-19
lines changed Original file line number Diff line number Diff line change @@ -472,11 +472,6 @@ class GenericLlmRequest
472
472
mExcludeInputFromOutput = exclude;
473
473
}
474
474
475
- bool getExcludeInputFromOutput ()
476
- {
477
- return mExcludeInputFromOutput ;
478
- }
479
-
480
475
// / @brief Get the params of the context
481
476
// / @return The params of the context
482
477
[[nodiscard]] std::optional<executor::ContextPhaseParams> const & getContextPhaseParams () const noexcept
@@ -774,11 +769,6 @@ class GenericLlmRequest
774
769
return mParentRequestId ;
775
770
}
776
771
777
- [[nodiscard]] SizeType32 getSequenceIndex () const
778
- {
779
- return mSequenceIndex ;
780
- }
781
-
782
772
// / @brief Return a vector of the last-generated tokens of shape [num_beams]
783
773
[[nodiscard]] VecTokens const & getLastTokens ()
784
774
{
@@ -1866,11 +1856,6 @@ class GenericLlmRequest
1866
1856
// current position of the prompt tuning table (only used in chunked prefill mode)
1867
1857
SizeType32 mPtableCurrentPosition {0 };
1868
1858
1869
- [[nodiscard]] std::shared_ptr<std::vector<bool >> getSequenceFinalVec () const
1870
- {
1871
- return mSequenceFinalVec ;
1872
- }
1873
-
1874
1859
protected:
1875
1860
bool mIsStreaming ;
1876
1861
Original file line number Diff line number Diff line change @@ -113,8 +113,6 @@ void initBindings(pybind11::module_& m)
113
113
.def (" set_generated_tokens" , &GenLlmReq::setGeneratedTokens, py::arg (" generated_beam_tokens" ))
114
114
.def (" pause" , &GenLlmReq::pause, py::arg (" max_input_len" ))
115
115
.def_property (" max_sent_token_len" , &GenLlmReq::getMaxSentTokenLen, &GenLlmReq::setMaxSentTokenLen)
116
- .def_property (
117
- " exclude_input_from_output" , &GenLlmReq::getExcludeInputFromOutput, &GenLlmReq::setExcludeInputFromOutput)
118
116
.def_property_readonly (" prompt_embedding_table" , &GenLlmReq::getPromptEmbeddingTable)
119
117
.def_property_readonly (" multimodal_embedding" , &GenLlmReq::getMultimodalEmbedding)
120
118
.def_property_readonly (" mrope_rotary_cos_sin" , &GenLlmReq::getMropeRotaryCosSin)
Original file line number Diff line number Diff line change @@ -386,11 +386,11 @@ def create_child_request(self, request_id: int):
386
386
child_request .py_request_id = child_request .request_id
387
387
child_request .py_llm_request_type = child_request .llm_request_type
388
388
child_request .py_batch_idx = None
389
+
390
+ # Mimic the behavior of the original LlmRequest.
389
391
child_request .is_attention_dp_dummy = self .is_attention_dp_dummy
390
392
child_request .is_cuda_graph_dummy = self .is_cuda_graph_dummy
391
393
child_request .is_dummy = self .is_dummy
392
-
393
- # Mimic the behavior of the original LlmRequest.
394
394
child_request .create_response = partial (create_response , child_request )
395
395
child_request .finish_by = partial (finish_by , child_request )
396
396
You can’t perform that action at this time.
0 commit comments