31
31
#include < filesystem>
32
32
#include < algorithm>
33
33
34
+ static const char * RPC_DEBUG = std::getenv(" GGML_RPC_DEBUG" );
35
+
36
+ #define LOG_DBG (...) \
37
+ do { if (RPC_DEBUG) GGML_LOG_DEBUG (__VA_ARGS__); } while (0 )
38
+
39
+
34
40
namespace fs = std::filesystem;
35
41
36
42
static constexpr size_t MAX_CHUNK_SIZE = 1024ull * 1024ull * 1024ull ; // 1 GiB
@@ -47,7 +53,7 @@ struct socket_t {
47
53
sockfd_t fd;
48
54
socket_t (sockfd_t fd) : fd(fd) {}
49
55
~socket_t () {
50
- GGML_PRINT_DEBUG (" [%s] closing socket %d\n " , __func__, this ->fd );
56
+ LOG_DBG (" [%s] closing socket %d\n " , __func__, this ->fd );
51
57
#ifdef _WIN32
52
58
closesocket (this ->fd );
53
59
#else
@@ -265,14 +271,14 @@ static std::shared_ptr<socket_t> socket_connect(const char * host, int port) {
265
271
return nullptr ;
266
272
}
267
273
if (!set_no_delay (sockfd)) {
268
- fprintf (stderr, " Failed to set TCP_NODELAY\n " );
274
+ GGML_LOG_ERROR ( " Failed to set TCP_NODELAY\n " );
269
275
return nullptr ;
270
276
}
271
277
addr.sin_family = AF_INET;
272
278
addr.sin_port = htons (port);
273
279
struct hostent * server = gethostbyname (host);
274
280
if (server == NULL ) {
275
- fprintf (stderr, " Cannot resolve host '%s'\n " , host);
281
+ GGML_LOG_ERROR ( " Cannot resolve host '%s'\n " , host);
276
282
return nullptr ;
277
283
}
278
284
memcpy (&addr.sin_addr .s_addr , server->h_addr , server->h_length );
@@ -289,7 +295,7 @@ static std::shared_ptr<socket_t> socket_accept(sockfd_t srv_sockfd) {
289
295
return nullptr ;
290
296
}
291
297
if (!set_no_delay (client_socket_fd)) {
292
- fprintf (stderr, " Failed to set TCP_NODELAY\n " );
298
+ GGML_LOG_ERROR ( " Failed to set TCP_NODELAY\n " );
293
299
return nullptr ;
294
300
}
295
301
return client_socket;
@@ -302,11 +308,11 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
302
308
return nullptr ;
303
309
}
304
310
if (!set_reuse_addr (sockfd)) {
305
- fprintf (stderr, " Failed to set SO_REUSEADDR\n " );
311
+ GGML_LOG_ERROR ( " Failed to set SO_REUSEADDR\n " );
306
312
return nullptr ;
307
313
}
308
314
if (inet_addr (host) == INADDR_NONE) {
309
- fprintf (stderr, " Invalid host address: %s\n " , host);
315
+ GGML_LOG_ERROR ( " Invalid host address: %s\n " , host);
310
316
return nullptr ;
311
317
}
312
318
struct sockaddr_in serv_addr;
@@ -349,7 +355,7 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
349
355
return false ;
350
356
}
351
357
if (n == 0 ) {
352
- GGML_LOG_ERROR (" recv returned 0 (peer closed?)\n " );
358
+ LOG_DBG (" recv returned 0 (peer closed?)\n " );
353
359
return false ;
354
360
}
355
361
bytes_recv += (size_t )n;
@@ -383,7 +389,7 @@ static bool recv_msg(sockfd_t sockfd, std::vector<uint8_t> & input) {
383
389
try {
384
390
input.resize (size);
385
391
} catch (const std::bad_alloc & e) {
386
- fprintf (stderr, " Failed to allocate input buffer of size %" PRIu64 " \n " , size);
392
+ GGML_LOG_ERROR ( " Failed to allocate input buffer of size %" PRIu64 " \n " , size);
387
393
return false ;
388
394
}
389
395
return recv_data (sockfd, input.data (), size);
@@ -443,11 +449,11 @@ static bool check_server_version(const std::shared_ptr<socket_t> & sock) {
443
449
bool status = send_rpc_cmd (sock, RPC_CMD_HELLO, nullptr , 0 , &response, sizeof (response));
444
450
RPC_STATUS_ASSERT (status);
445
451
if (response.major != RPC_PROTO_MAJOR_VERSION || response.minor > RPC_PROTO_MINOR_VERSION) {
446
- fprintf (stderr, " RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
452
+ GGML_LOG_ERROR ( " RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
447
453
return false ;
448
454
}
449
455
if (response.minor != RPC_PROTO_MINOR_VERSION || response.patch != RPC_PROTO_PATCH_VERSION) {
450
- fprintf (stderr, " WARNING: RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
456
+ GGML_LOG_INFO ( " WARNING: RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
451
457
}
452
458
return true ;
453
459
}
@@ -488,7 +494,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
488
494
if (!check_server_version (sock)) {
489
495
return nullptr ;
490
496
}
491
- GGML_PRINT_DEBUG (" [%s] connected to %s, sockfd=%d\n " , __func__, endpoint.c_str (), sock->fd );
497
+ LOG_DBG (" [%s] connected to %s, sockfd=%d\n " , __func__, endpoint.c_str (), sock->fd );
492
498
sockets[endpoint] = sock;
493
499
return sock;
494
500
}
@@ -809,7 +815,7 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
809
815
}
810
816
auto sock = get_socket (endpoint);
811
817
if (sock == nullptr ) {
812
- fprintf (stderr, " Failed to connect to %s\n " , endpoint);
818
+ GGML_LOG_ERROR ( " Failed to connect to %s\n " , endpoint);
813
819
return nullptr ;
814
820
}
815
821
size_t alignment = get_alignment (sock);
@@ -909,7 +915,7 @@ void rpc_server::hello(rpc_msg_hello_rsp & response) {
909
915
response.major = RPC_PROTO_MAJOR_VERSION;
910
916
response.minor = RPC_PROTO_MINOR_VERSION;
911
917
response.patch = RPC_PROTO_PATCH_VERSION;
912
- GGML_PRINT_DEBUG (" [%s] version: %d.%d.%d\n " , __func__, response.major , response.minor , response.patch );
918
+ LOG_DBG (" [%s] version: %d.%d.%d\n " , __func__, response.major , response.minor , response.patch );
913
919
}
914
920
915
921
bool rpc_server::get_alloc_size (const rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) {
@@ -929,15 +935,15 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
929
935
GGML_LOG_ERROR (" Null tensor pointer passed to server get_alloc_size function.\n " );
930
936
return false ;
931
937
}
932
-
938
+ LOG_DBG ( " [%s] buffer: %p, data: %p \n " , __func__, ( void *)tensor-> buffer , tensor-> data );
933
939
if (tensor->buffer == nullptr ) {
934
940
// No buffer allocated.
935
941
buft = ggml_backend_get_default_buffer_type (backend);
936
942
} else {
937
943
buft = tensor->buffer ->buft ;
938
944
}
939
945
940
- response.alloc_size = ggml_backend_buft_get_alloc_size (buft,tensor);
946
+ response.alloc_size = ggml_backend_buft_get_alloc_size (buft, tensor);
941
947
942
948
return true ;
943
949
}
@@ -950,29 +956,29 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
950
956
if (buffer != nullptr ) {
951
957
response.remote_ptr = reinterpret_cast <uint64_t >(buffer);
952
958
response.remote_size = buffer->size ;
953
- GGML_PRINT_DEBUG (" [%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 " , remote_size: %" PRIu64 " \n " , __func__, request.size , response.remote_ptr , response.remote_size );
959
+ LOG_DBG (" [%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 " , remote_size: %" PRIu64 " \n " , __func__, request.size , response.remote_ptr , response.remote_size );
954
960
buffers.insert (buffer);
955
961
} else {
956
- GGML_LOG_ERROR (" [%s] size: %" PRIu64 " -> failed\n " , __func__, request.size );
962
+ LOG_DBG (" [%s] size: %" PRIu64 " -> failed\n " , __func__, request.size );
957
963
}
958
964
}
959
965
960
966
void rpc_server::get_alignment (rpc_msg_get_alignment_rsp & response) {
961
967
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type (backend);
962
968
size_t alignment = ggml_backend_buft_get_alignment (buft);
963
- GGML_PRINT_DEBUG (" [%s] alignment: %lu\n " , __func__, alignment);
969
+ LOG_DBG (" [%s] alignment: %lu\n " , __func__, alignment);
964
970
response.alignment = alignment;
965
971
}
966
972
967
973
void rpc_server::get_max_size (rpc_msg_get_max_size_rsp & response) {
968
974
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type (backend);
969
975
size_t max_size = ggml_backend_buft_get_max_size (buft);
970
- GGML_PRINT_DEBUG (" [%s] max_size: %lu\n " , __func__, max_size);
976
+ LOG_DBG (" [%s] max_size: %lu\n " , __func__, max_size);
971
977
response.max_size = max_size;
972
978
}
973
979
974
980
bool rpc_server::buffer_get_base (const rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response) {
975
- GGML_PRINT_DEBUG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
981
+ LOG_DBG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
976
982
ggml_backend_buffer_t buffer = reinterpret_cast <ggml_backend_buffer_t >(request.remote_ptr );
977
983
if (buffers.find (buffer) == buffers.end ()) {
978
984
GGML_LOG_ERROR (" [%s] buffer not found\n " , __func__);
@@ -984,7 +990,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
984
990
}
985
991
986
992
bool rpc_server::free_buffer (const rpc_msg_free_buffer_req & request) {
987
- GGML_PRINT_DEBUG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
993
+ LOG_DBG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
988
994
ggml_backend_buffer_t buffer = reinterpret_cast <ggml_backend_buffer_t >(request.remote_ptr );
989
995
if (buffers.find (buffer) == buffers.end ()) {
990
996
GGML_LOG_ERROR (" [%s] buffer not found\n " , __func__);
@@ -996,7 +1002,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
996
1002
}
997
1003
998
1004
bool rpc_server::buffer_clear (const rpc_msg_buffer_clear_req & request) {
999
- GGML_PRINT_DEBUG (" [%s] remote_ptr: %" PRIx64 " , value: %u\n " , __func__, request.remote_ptr , request.value );
1005
+ LOG_DBG (" [%s] remote_ptr: %" PRIx64 " , value: %u\n " , __func__, request.remote_ptr , request.value );
1000
1006
ggml_backend_buffer_t buffer = reinterpret_cast <ggml_backend_buffer_t >(request.remote_ptr );
1001
1007
if (buffers.find (buffer) == buffers.end ()) {
1002
1008
GGML_LOG_ERROR (" [%s] buffer not found\n " , __func__);
@@ -1073,7 +1079,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
1073
1079
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1074
1080
return false ;
1075
1081
}
1076
- GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu\n " , __func__, (void *)tensor->buffer , tensor->data , offset, size);
1082
+ LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu\n " , __func__, (void *)tensor->buffer , tensor->data , offset, size);
1077
1083
1078
1084
// sanitize tensor->data
1079
1085
{
@@ -1096,7 +1102,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
1096
1102
fs::path cache_file = fs::path (cache_dir) / hash_str;
1097
1103
std::ofstream ofs (cache_file, std::ios::binary);
1098
1104
ofs.write ((const char *)data, size);
1099
- printf (" [%s] saved to '%s'\n " , __func__, cache_file.c_str ());
1105
+ GGML_LOG_INFO (" [%s] saved to '%s'\n " , __func__, cache_file.c_str ());
1100
1106
}
1101
1107
ggml_backend_tensor_set (tensor, data, offset, size);
1102
1108
return true ;
@@ -1142,8 +1148,8 @@ bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rp
1142
1148
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1143
1149
return false ;
1144
1150
}
1145
- GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu, hash: %" PRIx64 " \n " ,
1146
- __func__, (void *)tensor->buffer , tensor->data , request.offset , size, request.hash );
1151
+ LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu, hash: %" PRIx64 " \n " ,
1152
+ __func__, (void *)tensor->buffer , tensor->data , request.offset , size, request.hash );
1147
1153
1148
1154
// sanitize tensor->data
1149
1155
{
@@ -1177,7 +1183,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
1177
1183
GGML_LOG_ERROR (" Null tensor pointer passed to server init_tensor function.\n " );
1178
1184
return false ;
1179
1185
}
1180
-
1186
+ LOG_DBG ( " [%s] buffer: %p, data: %p \n " , __func__, ( void *)tensor-> buffer , tensor-> data );
1181
1187
// Call the backend's buffer_init_tensor function
1182
1188
ggml_backend_buffer_t buffer = tensor->buffer ;
1183
1189
if (buffer && buffer->iface .init_tensor ) {
@@ -1210,7 +1216,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
1210
1216
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1211
1217
return false ;
1212
1218
}
1213
- GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %" PRIu64 " \n " , __func__, (void *)tensor->buffer , tensor->data , request.offset , request.size );
1219
+ LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %" PRIu64 " \n " , __func__, (void *)tensor->buffer , tensor->data , request.offset , request.size );
1214
1220
1215
1221
// sanitize tensor->data
1216
1222
{
@@ -1254,7 +1260,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1254
1260
uint64_t dst_buf_sz = (uint64_t ) ggml_backend_buffer_get_size(dst->buffer);
1255
1261
1256
1262
if (dst_data + src_size > dst_base + dst_buf_sz) {
1257
- GGML_PRINT_DEBUG (" [%s] out-of-bounds write in rpc_server::copy_tensor:\n "
1263
+ GGML_LOG_ERROR (" [%s] out-of-bounds write in rpc_server::copy_tensor:\n "
1258
1264
" write range : [0x%" PRIx64 " , 0x%" PRIx64 " ]\n "
1259
1265
" buffer base: [0x%" PRIx64 " , 0x%" PRIx64 " ]\n " ,
1260
1266
__func__,
@@ -1265,8 +1271,8 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1265
1271
return false ;
1266
1272
}
1267
1273
1268
- GGML_PRINT_DEBUG (" [%s] src->buffer: %p, dst->buffer: %p\n " ,
1269
- __func__, (void *) src->buffer, (void *) dst->buffer);
1274
+ LOG_DBG (" [%s] src->buffer: %p, dst->buffer: %p\n " ,
1275
+ __func__, (void *) src->buffer, (void *) dst->buffer);
1270
1276
1271
1277
response.result = ggml_backend_buffer_copy_tensor(src, dst);
1272
1278
return true ;
@@ -1342,7 +1348,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
1342
1348
return false ;
1343
1349
}
1344
1350
const rpc_tensor * tensors = (const rpc_tensor *)(input.data () + sizeof (n_nodes) + n_nodes*sizeof (uint64_t ) + sizeof (n_tensors));
1345
- GGML_PRINT_DEBUG (" [%s] n_nodes: %u, n_tensors: %u\n " , __func__, n_nodes, n_tensors);
1351
+ LOG_DBG (" [%s] n_nodes: %u, n_tensors: %u\n " , __func__, n_nodes, n_tensors);
1346
1352
1347
1353
size_t buf_size = ggml_tensor_overhead ()*(n_nodes + n_tensors) + ggml_graph_overhead_custom (n_nodes, false );
1348
1354
@@ -1394,7 +1400,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1394
1400
}
1395
1401
// the first command sent by the client must be HELLO
1396
1402
if (cmd != RPC_CMD_HELLO) {
1397
- fprintf (stderr, " Expected HELLO command, update client\n " );
1403
+ GGML_LOG_ERROR ( " Expected HELLO command, update client\n " );
1398
1404
return ;
1399
1405
}
1400
1406
if (!recv_msg (sockfd, nullptr , 0 )) {
@@ -1411,7 +1417,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1411
1417
}
1412
1418
if (cmd >= RPC_CMD_COUNT) {
1413
1419
// fail fast if the command is invalid
1414
- fprintf (stderr, " Unknown command: %d\n " , cmd);
1420
+ GGML_LOG_ERROR ( " Unknown command: %d\n " , cmd);
1415
1421
break ;
1416
1422
}
1417
1423
switch (cmd) {
@@ -1599,7 +1605,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1599
1605
break ;
1600
1606
}
1601
1607
default : {
1602
- fprintf (stderr, " Unknown command: %d\n " , cmd);
1608
+ GGML_LOG_ERROR ( " Unknown command: %d\n " , cmd);
1603
1609
return ;
1604
1610
}
1605
1611
}
0 commit comments