@@ -96,7 +96,7 @@ void MLACacheFormatter::format(TransferSession& session)
96
96
{
97
97
NVTX3_SCOPED_RANGE (MLACacheFormatter_format);
98
98
auto const & llmRequest = session.getLlmRequest ();
99
- TLLM_LOG_INFO (
99
+ TLLM_LOG_DEBUG (
100
100
mpi::MpiComm::world ().getRank (), " Start sending KV cache for request ID: %ld." , llmRequest.mRequestId );
101
101
auto const & selfConfig = session.getSelfState ().getCacheState ().value ();
102
102
auto const & destConfig = session.getOtherState ().getCacheState ().value ();
@@ -137,7 +137,7 @@ void MLACacheFormatter::format(TransferSession& session)
137
137
&& destConfig.getParallelConfig ().mPipelineParallelism == selfConfig.getParallelConfig ().mPipelineParallelism )
138
138
{
139
139
140
- TLLM_LOG_INFO (" Try using zero-copy for the KV cache." );
140
+ TLLM_LOG_DEBUG (" Try using zero-copy for the KV cache." );
141
141
NVTX3_SCOPED_RANGE (sendBufferFun);
142
142
143
143
TLLM_CUDA_CHECK (cudaSetDevice (deviceId));
@@ -149,7 +149,7 @@ void MLACacheFormatter::format(TransferSession& session)
149
149
}
150
150
}
151
151
152
- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (), " End the sending of KV cache for the request ID: %ld." ,
152
+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (), " End the sending of KV cache for the request ID: %ld." ,
153
153
llmRequest.mRequestId );
154
154
155
155
return ;
@@ -251,7 +251,7 @@ void MLACacheFormatter::format(TransferSession& session)
251
251
{
252
252
if (!common::getEnvEnableReceiveKVCacheParallel ())
253
253
{
254
- TLLM_LOG_INFO (" Disable parallel receiving of the KV cache." );
254
+ TLLM_LOG_DEBUG (" Disable parallel receiving of the KV cache." );
255
255
for (size_t i = 0 ; i < connections.size (); i++)
256
256
{
257
257
sendBufferFun (deviceId, i);
@@ -289,7 +289,7 @@ void MLACacheFormatter::format(TransferSession& session)
289
289
}
290
290
mCacheTransBufferManager ->freeBufferIndexForSend (cacheBufferId);
291
291
292
- TLLM_LOG_INFO (
292
+ TLLM_LOG_DEBUG (
293
293
mpi::MpiComm::world ().getRank (), " End the sending of KV cache for the request ID: %ld." , llmRequest.mRequestId );
294
294
}
295
295
@@ -299,7 +299,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
299
299
auto const & llmRequest = session.getLlmRequest ();
300
300
TLLM_CHECK_WITH_INFO (llmRequest.mSamplingConfig .beamWidth == 1 , " Currently only supports beam width 1." );
301
301
auto const ctxReqId = llmRequest.getContextPhaseParams ().value ().getReqId ();
302
- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (),
302
+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (),
303
303
" Start receiving KV cache for request ID: %ld, context request ID: %ld." , llmRequest.mRequestId , ctxReqId);
304
304
auto const & selfConfig = session.getSelfState ().getCacheState ().value ();
305
305
auto const & destConfig = session.getOtherState ().getCacheState ().value ();
@@ -335,7 +335,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
335
335
&& destConfig.getParallelConfig ().mPipelineParallelism == selfConfig.getParallelConfig ().mPipelineParallelism )
336
336
{
337
337
// recv
338
- TLLM_LOG_INFO (" Try zcopy for KV cache" );
338
+ TLLM_LOG_DEBUG (" Try zcopy for KV cache" );
339
339
NVTX3_SCOPED_RANGE (recvBufferFun);
340
340
TLLM_CUDA_CHECK (cudaSetDevice (deviceId));
341
341
TLLM_CHECK (pickUpConnections.size () == 1 );
@@ -347,7 +347,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
347
347
session.recv (pickUpConnections[i], block->data (), block->getSizeInBytes ());
348
348
}
349
349
}
350
- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (),
350
+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (),
351
351
" End receiving KV cache for request ID: %ld, context request ID: %ld." , llmRequest.mRequestId ,
352
352
llmRequest.getContextPhaseParams ().value ().getReqId ());
353
353
return ;
@@ -509,7 +509,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
509
509
mCacheTransBufferManager ->freeBufferIndexForRecv (cacheBufferId);
510
510
}
511
511
512
- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (),
512
+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (),
513
513
" End receiving KV cache for request ID: %ld, context request ID: %ld." , llmRequest.mRequestId ,
514
514
llmRequest.getContextPhaseParams ().value ().getReqId ());
515
515
}
0 commit comments