Skip to content

Commit 390efeb

Browse files
committed
no conflicts
1 parent 0694345 commit 390efeb

File tree

1 file changed

+41
-35
lines changed

1 file changed

+41
-35
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 41 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
#include <filesystem>
3434
#include <algorithm>
3535

36+
static const char * RPC_DEBUG = std::getenv("GGML_RPC_DEBUG");
37+
38+
#define LOG_DBG(...) \
39+
do { if (RPC_DEBUG) GGML_LOG_DEBUG(__VA_ARGS__); } while (0)
40+
41+
3642
namespace fs = std::filesystem;
3743

3844
static constexpr size_t MAX_CHUNK_SIZE = 1024ull * 1024ull * 1024ull; // 1 GiB
@@ -49,7 +55,7 @@ struct socket_t {
4955
sockfd_t fd;
5056
socket_t(sockfd_t fd) : fd(fd) {}
5157
~socket_t() {
52-
GGML_PRINT_DEBUG("[%s] closing socket %d\n", __func__, this->fd);
58+
LOG_DBG("[%s] closing socket %d\n", __func__, this->fd);
5359
#ifdef _WIN32
5460
closesocket(this->fd);
5561
#else
@@ -277,14 +283,14 @@ static std::shared_ptr<socket_t> socket_connect(const char * host, int port) {
277283
return nullptr;
278284
}
279285
if (!set_no_delay(sockfd)) {
280-
fprintf(stderr, "Failed to set TCP_NODELAY\n");
286+
GGML_LOG_ERROR("Failed to set TCP_NODELAY\n");
281287
return nullptr;
282288
}
283289
addr.sin_family = AF_INET;
284290
addr.sin_port = htons(port);
285291
struct hostent * server = gethostbyname(host);
286292
if (server == NULL) {
287-
fprintf(stderr, "Cannot resolve host '%s'\n", host);
293+
GGML_LOG_ERROR("Cannot resolve host '%s'\n", host);
288294
return nullptr;
289295
}
290296
memcpy(&addr.sin_addr.s_addr, server->h_addr, server->h_length);
@@ -301,7 +307,7 @@ static std::shared_ptr<socket_t> socket_accept(sockfd_t srv_sockfd) {
301307
return nullptr;
302308
}
303309
if (!set_no_delay(client_socket_fd)) {
304-
fprintf(stderr, "Failed to set TCP_NODELAY\n");
310+
GGML_LOG_ERROR("Failed to set TCP_NODELAY\n");
305311
return nullptr;
306312
}
307313
return client_socket;
@@ -314,11 +320,11 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
314320
return nullptr;
315321
}
316322
if (!set_reuse_addr(sockfd)) {
317-
fprintf(stderr, "Failed to set SO_REUSEADDR\n");
323+
GGML_LOG_ERROR("Failed to set SO_REUSEADDR\n");
318324
return nullptr;
319325
}
320326
if (inet_addr(host) == INADDR_NONE) {
321-
fprintf(stderr, "Invalid host address: %s\n", host);
327+
GGML_LOG_ERROR("Invalid host address: %s\n", host);
322328
return nullptr;
323329
}
324330
struct sockaddr_in serv_addr;
@@ -361,7 +367,7 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
361367
return false;
362368
}
363369
if (n == 0) {
364-
GGML_LOG_ERROR("recv returned 0 (peer closed?)\n");
370+
LOG_DBG("recv returned 0 (peer closed?)\n");
365371
return false;
366372
}
367373
bytes_recv += (size_t)n;
@@ -395,7 +401,7 @@ static bool recv_msg(sockfd_t sockfd, std::vector<uint8_t> & input) {
395401
try {
396402
input.resize(size);
397403
} catch (const std::bad_alloc & e) {
398-
fprintf(stderr, "Failed to allocate input buffer of size %" PRIu64 "\n", size);
404+
GGML_LOG_ERROR("Failed to allocate input buffer of size %" PRIu64 "\n", size);
399405
return false;
400406
}
401407
return recv_data(sockfd, input.data(), size);
@@ -479,11 +485,11 @@ static bool check_server_version(const std::shared_ptr<socket_t> & sock) {
479485
status = send_rpc_cmd(sock, RPC_CMD_HELLO, nullptr, 0, &response, sizeof(response));
480486
RPC_STATUS_ASSERT(status);
481487
if (response.major != RPC_PROTO_MAJOR_VERSION || response.minor > RPC_PROTO_MINOR_VERSION) {
482-
fprintf(stderr, "RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
488+
GGML_LOG_ERROR("RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
483489
return false;
484490
}
485491
if (response.minor != RPC_PROTO_MINOR_VERSION || response.patch != RPC_PROTO_PATCH_VERSION) {
486-
fprintf(stderr, "WARNING: RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
492+
GGML_LOG_INFO("WARNING: RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
487493
}
488494
return true;
489495
}
@@ -524,7 +530,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
524530
if (!check_server_version(sock)) {
525531
return nullptr;
526532
}
527-
GGML_PRINT_DEBUG("[%s] connected to %s, sockfd=%d\n", __func__, endpoint.c_str(), sock->fd);
533+
LOG_DBG("[%s] connected to %s, sockfd=%d\n", __func__, endpoint.c_str(), sock->fd);
528534
sockets[endpoint] = sock;
529535
return sock;
530536
}
@@ -845,7 +851,7 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
845851
}
846852
auto sock = get_socket(endpoint);
847853
if (sock == nullptr) {
848-
fprintf(stderr, "Failed to connect to %s\n", endpoint);
854+
GGML_LOG_ERROR("Failed to connect to %s\n", endpoint);
849855
return nullptr;
850856
}
851857
size_t alignment = get_alignment(sock);
@@ -954,7 +960,7 @@ void rpc_server::hello(rpc_msg_hello_rsp & response) {
954960
response.major = RPC_PROTO_MAJOR_VERSION;
955961
response.minor = RPC_PROTO_MINOR_VERSION;
956962
response.patch = RPC_PROTO_PATCH_VERSION;
957-
GGML_PRINT_DEBUG("[%s] version: %d.%d.%d\n", __func__, response.major, response.minor, response.patch);
963+
LOG_DBG("[%s] version: %d.%d.%d\n", __func__, response.major, response.minor, response.patch);
958964
}
959965

960966
bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) {
@@ -974,15 +980,15 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
974980
GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
975981
return false;
976982
}
977-
983+
LOG_DBG("[%s] buffer: %p, data: %p\n", __func__, (void*)tensor->buffer, tensor->data);
978984
if (tensor->buffer == nullptr) {
979985
//No buffer allocated.
980986
buft = ggml_backend_get_default_buffer_type(backend);
981987
} else {
982988
buft = tensor->buffer->buft;
983989
}
984990

985-
response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
991+
response.alloc_size = ggml_backend_buft_get_alloc_size(buft, tensor);
986992

987993
return true;
988994
}
@@ -996,30 +1002,30 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
9961002
uint64_t rpk = random_id();
9971003
response.remote_ptr = rpk;
9981004
response.remote_size = buffer->size;
999-
GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> handle: %" PRIu64 ", remote_size: %" PRIu64 "\n",
1005+
LOG_DBG("[%s] size: %" PRIu64 " -> handle: %" PRIu64 ", remote_size: %" PRIu64 "\n",
10001006
__func__, request.size, rpk, response.remote_size);
10011007
buffers[rpk] = buffer;
10021008
} else {
1003-
GGML_LOG_ERROR("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size);
1009+
LOG_DBG("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size);
10041010
}
10051011
}
10061012

10071013
void rpc_server::get_alignment(rpc_msg_get_alignment_rsp & response) {
10081014
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
10091015
size_t alignment = ggml_backend_buft_get_alignment(buft);
1010-
GGML_PRINT_DEBUG("[%s] alignment: %lu\n", __func__, alignment);
1016+
LOG_DBG("[%s] alignment: %lu\n", __func__, alignment);
10111017
response.alignment = alignment;
10121018
}
10131019

10141020
void rpc_server::get_max_size(rpc_msg_get_max_size_rsp & response) {
10151021
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
10161022
size_t max_size = ggml_backend_buft_get_max_size(buft);
1017-
GGML_PRINT_DEBUG("[%s] max_size: %lu\n", __func__, max_size);
1023+
LOG_DBG("[%s] max_size: %lu\n", __func__, max_size);
10181024
response.max_size = max_size;
10191025
}
10201026

10211027
bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response) {
1022-
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
1028+
LOG_DBG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
10231029
auto it = buffers.find(request.remote_ptr);
10241030
if (it == buffers.end()) {
10251031
GGML_LOG_ERROR("[%s] buffer handle not found: %" PRIu64 "\n", __func__, request.remote_ptr);
@@ -1032,7 +1038,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
10321038
}
10331039

10341040
bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
1035-
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
1041+
LOG_DBG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
10361042
auto it = buffers.find(request.remote_ptr);
10371043
if (it == buffers.end()) {
10381044
GGML_LOG_ERROR("[%s] buffer handle not found: %" PRIu64 "\n", __func__, request.remote_ptr);
@@ -1045,7 +1051,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
10451051
}
10461052

10471053
bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
1048-
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
1054+
LOG_DBG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
10491055
auto it = buffers.find(request.remote_ptr);
10501056
if (it == buffers.end()) {
10511057
GGML_LOG_ERROR("[%s] buffer handle not found: %" PRIu64 "\n", __func__, request.remote_ptr);
@@ -1126,7 +1132,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
11261132
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
11271133
return false;
11281134
}
1129-
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
1135+
LOG_DBG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
11301136

11311137
// sanitize tensor->data
11321138
{
@@ -1149,7 +1155,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
11491155
fs::path cache_file = fs::path(cache_dir) / hash_str;
11501156
std::ofstream ofs(cache_file, std::ios::binary);
11511157
ofs.write((const char *)data, size);
1152-
printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
1158+
GGML_LOG_INFO("[%s] saved to '%s'\n", __func__, cache_file.c_str());
11531159
}
11541160
ggml_backend_tensor_set(tensor, data, offset, size);
11551161
return true;
@@ -1195,8 +1201,8 @@ bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rp
11951201
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
11961202
return false;
11971203
}
1198-
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n",
1199-
__func__, (void*)tensor->buffer, tensor->data, request.offset, size, request.hash);
1204+
LOG_DBG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n",
1205+
__func__, (void*)tensor->buffer, tensor->data, request.offset, size, request.hash);
12001206

12011207
// sanitize tensor->data
12021208
{
@@ -1230,7 +1236,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
12301236
GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
12311237
return false;
12321238
}
1233-
1239+
LOG_DBG("[%s] buffer: %p, data: %p\n", __func__, (void*)tensor->buffer, tensor->data);
12341240
// Call the backend's buffer_init_tensor function
12351241
ggml_backend_buffer_t buffer = tensor->buffer;
12361242
if (buffer && buffer->iface.init_tensor) {
@@ -1263,7 +1269,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
12631269
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
12641270
return false;
12651271
}
1266-
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
1272+
LOG_DBG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
12671273

12681274
// sanitize tensor->data
12691275
{
@@ -1307,7 +1313,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
13071313
uint64_t dst_buf_sz = (uint64_t) ggml_backend_buffer_get_size(dst->buffer);
13081314

13091315
if (dst_data + src_size > dst_base + dst_buf_sz) {
1310-
GGML_PRINT_DEBUG("[%s] out-of-bounds write in rpc_server::copy_tensor:\n"
1316+
GGML_LOG_ERROR("[%s] out-of-bounds write in rpc_server::copy_tensor:\n"
13111317
" write range : [0x%" PRIx64 ", 0x%" PRIx64 "]\n"
13121318
" buffer base: [0x%" PRIx64 ", 0x%" PRIx64 "]\n",
13131319
__func__,
@@ -1318,8 +1324,8 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
13181324
return false;
13191325
}
13201326

1321-
GGML_PRINT_DEBUG("[%s] src->buffer: %p, dst->buffer: %p\n",
1322-
__func__, (void*) src->buffer, (void*) dst->buffer);
1327+
LOG_DBG("[%s] src->buffer: %p, dst->buffer: %p\n",
1328+
__func__, (void*) src->buffer, (void*) dst->buffer);
13231329

13241330
response.result = ggml_backend_buffer_copy_tensor(src, dst);
13251331
return true;
@@ -1395,7 +1401,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
13951401
return false;
13961402
}
13971403
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
1398-
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
1404+
LOG_DBG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
13991405

14001406
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
14011407

@@ -1518,7 +1524,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
15181524
}
15191525
// the second command sent by the client must be HELLO
15201526
if (cmd != RPC_CMD_HELLO) {
1521-
fprintf(stderr, "Expected HELLO command, update client\n");
1527+
GGML_LOG_ERROR("Expected HELLO command, update client\n");
15221528
return;
15231529
}
15241530
if (!recv_msg(sockfd, nullptr, 0)) {
@@ -1535,7 +1541,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
15351541
}
15361542
if (cmd >= RPC_CMD_COUNT) {
15371543
// fail fast if the command is invalid
1538-
fprintf(stderr, "Unknown command: %d\n", cmd);
1544+
GGML_LOG_ERROR("Unknown command: %d\n", cmd);
15391545
break;
15401546
}
15411547
switch (cmd) {
@@ -1723,7 +1729,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
17231729
break;
17241730
}
17251731
default: {
1726-
fprintf(stderr, "Unknown command: %d\n", cmd);
1732+
GGML_LOG_ERROR("Unknown command: %d\n", cmd);
17271733
return;
17281734
}
17291735
}

0 commit comments

Comments
 (0)