Skip to content

Commit a90b238

Browse files
remove unnecessary api expose
Signed-off-by: Jaedeok Kim <[email protected]>
1 parent 63ea4c6 commit a90b238

File tree

3 files changed

+2
-19
lines changed

3 files changed

+2
-19
lines changed

cpp/include/tensorrt_llm/batch_manager/llmRequest.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -472,11 +472,6 @@ class GenericLlmRequest
472472
mExcludeInputFromOutput = exclude;
473473
}
474474

475-
bool getExcludeInputFromOutput()
476-
{
477-
return mExcludeInputFromOutput;
478-
}
479-
480475
/// @brief Get the params of the context
481476
/// @return The params of the context
482477
[[nodiscard]] std::optional<executor::ContextPhaseParams> const& getContextPhaseParams() const noexcept
@@ -774,11 +769,6 @@ class GenericLlmRequest
774769
return mParentRequestId;
775770
}
776771

777-
[[nodiscard]] SizeType32 getSequenceIndex() const
778-
{
779-
return mSequenceIndex;
780-
}
781-
782772
/// @brief Return a vector of the last-generated tokens of shape [num_beams]
783773
[[nodiscard]] VecTokens const& getLastTokens()
784774
{
@@ -1866,11 +1856,6 @@ class GenericLlmRequest
18661856
// current position of the prompt tuning table (only used in chunked prefill mode)
18671857
SizeType32 mPtableCurrentPosition{0};
18681858

1869-
[[nodiscard]] std::shared_ptr<std::vector<bool>> getSequenceFinalVec() const
1870-
{
1871-
return mSequenceFinalVec;
1872-
}
1873-
18741859
protected:
18751860
bool mIsStreaming;
18761861

cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,6 @@ void initBindings(pybind11::module_& m)
113113
.def("set_generated_tokens", &GenLlmReq::setGeneratedTokens, py::arg("generated_beam_tokens"))
114114
.def("pause", &GenLlmReq::pause, py::arg("max_input_len"))
115115
.def_property("max_sent_token_len", &GenLlmReq::getMaxSentTokenLen, &GenLlmReq::setMaxSentTokenLen)
116-
.def_property(
117-
"exclude_input_from_output", &GenLlmReq::getExcludeInputFromOutput, &GenLlmReq::setExcludeInputFromOutput)
118116
.def_property_readonly("prompt_embedding_table", &GenLlmReq::getPromptEmbeddingTable)
119117
.def_property_readonly("multimodal_embedding", &GenLlmReq::getMultimodalEmbedding)
120118
.def_property_readonly("mrope_rotary_cos_sin", &GenLlmReq::getMropeRotaryCosSin)

tensorrt_llm/_torch/pyexecutor/llm_request.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,11 +386,11 @@ def create_child_request(self, request_id: int):
386386
child_request.py_request_id = child_request.request_id
387387
child_request.py_llm_request_type = child_request.llm_request_type
388388
child_request.py_batch_idx = None
389+
390+
# Mimic the behavior of the original LlmRequest.
389391
child_request.is_attention_dp_dummy = self.is_attention_dp_dummy
390392
child_request.is_cuda_graph_dummy = self.is_cuda_graph_dummy
391393
child_request.is_dummy = self.is_dummy
392-
393-
# Mimic the behavior of the original LlmRequest.
394394
child_request.create_response = partial(create_response, child_request)
395395
child_request.finish_by = partial(finish_by, child_request)
396396

0 commit comments

Comments
 (0)