Skip to content

Commit d074aca

Browse files
committed
info->debug
Signed-off-by: Chuang Zhu <[email protected]>
1 parent eae3346 commit d074aca

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

cpp/tensorrt_llm/batch_manager/mlaCacheFormatter.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ void MLACacheFormatter::format(TransferSession& session)
9696
{
9797
NVTX3_SCOPED_RANGE(MLACacheFormatter_format);
9898
auto const& llmRequest = session.getLlmRequest();
99-
TLLM_LOG_INFO(
99+
TLLM_LOG_DEBUG(
100100
mpi::MpiComm::world().getRank(), "Start sending KV cache for request ID: %ld.", llmRequest.mRequestId);
101101
auto const& selfConfig = session.getSelfState().getCacheState().value();
102102
auto const& destConfig = session.getOtherState().getCacheState().value();
@@ -137,7 +137,7 @@ void MLACacheFormatter::format(TransferSession& session)
137137
&& destConfig.getParallelConfig().mPipelineParallelism == selfConfig.getParallelConfig().mPipelineParallelism)
138138
{
139139

140-
TLLM_LOG_INFO("Try using zero-copy for the KV cache.");
140+
TLLM_LOG_DEBUG("Try using zero-copy for the KV cache.");
141141
NVTX3_SCOPED_RANGE(sendBufferFun);
142142

143143
TLLM_CUDA_CHECK(cudaSetDevice(deviceId));
@@ -149,7 +149,7 @@ void MLACacheFormatter::format(TransferSession& session)
149149
}
150150
}
151151

152-
TLLM_LOG_INFO(mpi::MpiComm::world().getRank(), "End the sending of KV cache for the request ID: %ld.",
152+
TLLM_LOG_DEBUG(mpi::MpiComm::world().getRank(), "End the sending of KV cache for the request ID: %ld.",
153153
llmRequest.mRequestId);
154154

155155
return;
@@ -251,7 +251,7 @@ void MLACacheFormatter::format(TransferSession& session)
251251
{
252252
if (!common::getEnvEnableReceiveKVCacheParallel())
253253
{
254-
TLLM_LOG_INFO("Disable parallel receiving of the KV cache.");
254+
TLLM_LOG_DEBUG("Disable parallel receiving of the KV cache.");
255255
for (size_t i = 0; i < connections.size(); i++)
256256
{
257257
sendBufferFun(deviceId, i);
@@ -289,7 +289,7 @@ void MLACacheFormatter::format(TransferSession& session)
289289
}
290290
mCacheTransBufferManager->freeBufferIndexForSend(cacheBufferId);
291291

292-
TLLM_LOG_INFO(
292+
TLLM_LOG_DEBUG(
293293
mpi::MpiComm::world().getRank(), "End the sending of KV cache for the request ID: %ld.", llmRequest.mRequestId);
294294
}
295295

@@ -299,7 +299,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
299299
auto const& llmRequest = session.getLlmRequest();
300300
TLLM_CHECK_WITH_INFO(llmRequest.mSamplingConfig.beamWidth == 1, "Currently only supports beam width 1.");
301301
auto const ctxReqId = llmRequest.getContextPhaseParams().value().getReqId();
302-
TLLM_LOG_INFO(mpi::MpiComm::world().getRank(),
302+
TLLM_LOG_DEBUG(mpi::MpiComm::world().getRank(),
303303
"Start receiving KV cache for request ID: %ld, context request ID: %ld.", llmRequest.mRequestId, ctxReqId);
304304
auto const& selfConfig = session.getSelfState().getCacheState().value();
305305
auto const& destConfig = session.getOtherState().getCacheState().value();
@@ -335,7 +335,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
335335
&& destConfig.getParallelConfig().mPipelineParallelism == selfConfig.getParallelConfig().mPipelineParallelism)
336336
{
337337
// recv
338-
TLLM_LOG_INFO("Try zcopy for KV cache");
338+
TLLM_LOG_DEBUG("Try zcopy for KV cache");
339339
NVTX3_SCOPED_RANGE(recvBufferFun);
340340
TLLM_CUDA_CHECK(cudaSetDevice(deviceId));
341341
TLLM_CHECK(pickUpConnections.size() == 1);
@@ -347,7 +347,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
347347
session.recv(pickUpConnections[i], block->data(), block->getSizeInBytes());
348348
}
349349
}
350-
TLLM_LOG_INFO(mpi::MpiComm::world().getRank(),
350+
TLLM_LOG_DEBUG(mpi::MpiComm::world().getRank(),
351351
"End receiving KV cache for request ID: %ld, context request ID: %ld.", llmRequest.mRequestId,
352352
llmRequest.getContextPhaseParams().value().getReqId());
353353
return;
@@ -509,7 +509,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
509509
mCacheTransBufferManager->freeBufferIndexForRecv(cacheBufferId);
510510
}
511511

512-
TLLM_LOG_INFO(mpi::MpiComm::world().getRank(),
512+
TLLM_LOG_DEBUG(mpi::MpiComm::world().getRank(),
513513
"End receiving KV cache for request ID: %ld, context request ID: %ld.", llmRequest.mRequestId,
514514
llmRequest.getContextPhaseParams().value().getReqId());
515515
}

0 commit comments

Comments
 (0)