Skip to content

Commit dc68974

Browse files
metal : reusing llama.cpp logging (#3152)
* metal : reusing llama.cpp logging * cmake : build fix * metal : logging callback * metal : logging va_args memory fix * metal : minor cleanup * metal : setting function like logging macro to capital letters * llama.cpp : trailing whitespace fix * ggml : log level enum used by llama * Makefile : cleanup ggml-metal recipe * ggml : ggml_log_callback typedef * ggml : minor --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 527e57c commit dc68974

File tree

6 files changed

+98
-67
lines changed

6 files changed

+98
-67
lines changed

examples/llama-bench/llama-bench.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads)
903903
}
904904
}
905905

906-
static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
906+
static void llama_null_log_callback(enum ggml_log_level level, const char * text, void * user_data) {
907907
(void) level;
908908
(void) text;
909909
(void) user_data;

ggml-metal.h

+4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#pragma once
2121

22+
#include "ggml.h"
23+
2224
#include <stddef.h>
2325
#include <stdbool.h>
2426

@@ -33,6 +35,8 @@ struct ggml_cgraph;
3335
extern "C" {
3436
#endif
3537

38+
void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
39+
3640
struct ggml_metal_context;
3741

3842
// number of command buffers to use

ggml-metal.m

+74-42
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
#define MIN(a, b) ((a) < (b) ? (a) : (b))
1212
#define MAX(a, b) ((a) > (b) ? (a) : (b))
1313

14-
// TODO: temporary - reuse llama.cpp logging
1514
#ifdef GGML_METAL_NDEBUG
16-
#define metal_printf(...)
15+
#define GGML_METAL_LOG_INFO(...)
16+
#define GGML_METAL_LOG_WARN(...)
17+
#define GGML_METAL_LOG_ERROR(...)
1718
#else
18-
#define metal_printf(...) fprintf(stderr, __VA_ARGS__)
19+
#define GGML_METAL_LOG_INFO(...) ggml_metal_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
20+
#define GGML_METAL_LOG_WARN(...) ggml_metal_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
21+
#define GGML_METAL_LOG_ERROR(...) ggml_metal_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
1922
#endif
2023

2124
#define UNUSED(x) (void)(x)
@@ -120,8 +123,37 @@ @interface GGMLMetalClass : NSObject
120123
@implementation GGMLMetalClass
121124
@end
122125

126+
ggml_log_callback ggml_metal_log_callback = NULL;
127+
void * ggml_metal_log_user_data = NULL;
128+
129+
void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {
130+
ggml_metal_log_callback = log_callback;
131+
ggml_metal_log_user_data = user_data;
132+
}
133+
134+
static void ggml_metal_log(enum ggml_log_level level, const char* format, ...){
135+
if (ggml_metal_log_callback != NULL) {
136+
va_list args;
137+
va_start(args, format);
138+
char buffer[128];
139+
int len = vsnprintf(buffer, 128, format, args);
140+
if (len < 128) {
141+
ggml_metal_log_callback(level, buffer, ggml_metal_log_user_data);
142+
} else {
143+
char* buffer2 = malloc(len+1);
144+
vsnprintf(buffer2, len+1, format, args);
145+
buffer2[len] = 0;
146+
ggml_metal_log_callback(level, buffer2, ggml_metal_log_user_data);
147+
free(buffer2);
148+
}
149+
va_end(args);
150+
}
151+
}
152+
153+
154+
123155
struct ggml_metal_context * ggml_metal_init(int n_cb) {
124-
metal_printf("%s: allocating\n", __func__);
156+
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
125157

126158
id <MTLDevice> device;
127159
NSString * s;
@@ -131,14 +163,14 @@ @implementation GGMLMetalClass
131163
NSArray * devices = MTLCopyAllDevices();
132164
for (device in devices) {
133165
s = [device name];
134-
metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
166+
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
135167
}
136168
#endif
137169

138170
// Pick and show default Metal device
139171
device = MTLCreateSystemDefaultDevice();
140172
s = [device name];
141-
metal_printf("%s: picking default device: %s\n", __func__, [s UTF8String]);
173+
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
142174

143175
// Configure context
144176
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
@@ -165,7 +197,7 @@ @implementation GGMLMetalClass
165197
ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
166198

167199
if (error) {
168-
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
200+
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
169201
return NULL;
170202
}
171203
}
@@ -179,11 +211,11 @@ @implementation GGMLMetalClass
179211
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
180212
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
181213
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
182-
metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
214+
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path UTF8String]);
183215

184216
NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
185217
if (error) {
186-
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
218+
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
187219
return NULL;
188220
}
189221

@@ -195,7 +227,7 @@ @implementation GGMLMetalClass
195227
ctx->library = [ctx->device newLibraryWithSource:src options:nil error:&error];
196228
#endif
197229
if (error) {
198-
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
230+
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
199231
return NULL;
200232
}
201233
}
@@ -207,11 +239,11 @@ @implementation GGMLMetalClass
207239
#define GGML_METAL_ADD_KERNEL(name) \
208240
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
209241
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
210-
metal_printf("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
242+
GGML_METAL_LOG_INFO("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
211243
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
212244
(int) ctx->pipeline_##name.threadExecutionWidth); \
213245
if (error) { \
214-
metal_printf("%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
246+
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
215247
return NULL; \
216248
}
217249

@@ -270,21 +302,21 @@ @implementation GGMLMetalClass
270302
#undef GGML_METAL_ADD_KERNEL
271303
}
272304

273-
metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
305+
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
274306
#if TARGET_OS_OSX
275-
metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
307+
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
276308
if (ctx->device.maxTransferRate != 0) {
277-
metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
309+
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
278310
} else {
279-
metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
311+
GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
280312
}
281313
#endif
282314

283315
return ctx;
284316
}
285317

286318
void ggml_metal_free(struct ggml_metal_context * ctx) {
287-
metal_printf("%s: deallocating\n", __func__);
319+
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
288320
#define GGML_METAL_DEL_KERNEL(name) \
289321
[ctx->function_##name release]; \
290322
[ctx->pipeline_##name release];
@@ -360,7 +392,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
360392
void * data = NULL;
361393
const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
362394
if (result != 0) {
363-
metal_printf("%s: error: posix_memalign failed\n", __func__);
395+
GGML_METAL_LOG_ERROR("%s: error: posix_memalign failed\n", __func__);
364396
return NULL;
365397
}
366398

@@ -388,7 +420,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
388420
// Metal buffer based on the host memory pointer
389421
//
390422
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
391-
//metal_printf("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
423+
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
392424

393425
const int64_t tsize = ggml_nbytes(t);
394426

@@ -400,13 +432,13 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
400432
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
401433
*offs = (size_t) ioffs;
402434

403-
//metal_printf("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
435+
//GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
404436

405437
return ctx->buffers[i].metal;
406438
}
407439
}
408440

409-
metal_printf("%s: error: buffer is nil\n", __func__);
441+
GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
410442

411443
return nil;
412444
}
@@ -418,7 +450,7 @@ bool ggml_metal_add_buffer(
418450
size_t size,
419451
size_t max_size) {
420452
if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
421-
metal_printf("%s: too many buffers\n", __func__);
453+
GGML_METAL_LOG_ERROR("%s: error: too many buffers\n", __func__);
422454
return false;
423455
}
424456

@@ -428,7 +460,7 @@ bool ggml_metal_add_buffer(
428460
const int64_t ioffs = (int64_t) data - (int64_t) ctx->buffers[i].data;
429461

430462
if (ioffs >= 0 && ioffs < (int64_t) ctx->buffers[i].size) {
431-
metal_printf("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
463+
GGML_METAL_LOG_ERROR("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
432464
return false;
433465
}
434466
}
@@ -449,11 +481,11 @@ bool ggml_metal_add_buffer(
449481
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
450482

451483
if (ctx->buffers[ctx->n_buffers].metal == nil) {
452-
metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
484+
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
453485
return false;
454486
}
455487

456-
metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
488+
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
457489

458490
++ctx->n_buffers;
459491
} else {
@@ -473,31 +505,31 @@ bool ggml_metal_add_buffer(
473505
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
474506

475507
if (ctx->buffers[ctx->n_buffers].metal == nil) {
476-
metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
508+
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
477509
return false;
478510
}
479511

480-
metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
512+
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
481513
if (i + size_step < size) {
482-
metal_printf("\n");
514+
GGML_METAL_LOG_INFO("\n");
483515
}
484516

485517
++ctx->n_buffers;
486518
}
487519
}
488520

489521
#if TARGET_OS_OSX
490-
metal_printf(", (%8.2f / %8.2f)",
522+
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
491523
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
492524
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
493525

494526
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
495-
metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
527+
GGML_METAL_LOG_WARN(", warning: current allocated size is greater than the recommended max working set size\n", __func__);
496528
} else {
497-
metal_printf("\n");
529+
GGML_METAL_LOG_INFO("\n");
498530
}
499531
#else
500-
metal_printf(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
532+
GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
501533
#endif
502534
}
503535

@@ -610,7 +642,7 @@ void ggml_metal_graph_find_concurrency(
610642
}
611643

612644
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
613-
metal_printf("%s: too many elements for metal ctx->concur_list!\n", __func__);
645+
GGML_METAL_LOG_WARN("%s: too many elements for metal ctx->concur_list!\n", __func__);
614646
}
615647
}
616648

@@ -664,7 +696,7 @@ void ggml_metal_graph_compute(
664696
continue;
665697
}
666698

667-
//metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
699+
//GGML_METAL_LOG_INFO("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
668700

669701
struct ggml_tensor * src0 = gf->nodes[i]->src[0];
670702
struct ggml_tensor * src1 = gf->nodes[i]->src[1];
@@ -708,17 +740,17 @@ void ggml_metal_graph_compute(
708740
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
709741
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil;
710742

711-
//metal_printf("%s: op - %s\n", __func__, ggml_op_name(dst->op));
743+
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
712744
//if (src0) {
713-
// metal_printf("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
745+
// GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
714746
// ggml_is_contiguous(src0), src0->name);
715747
//}
716748
//if (src1) {
717-
// metal_printf("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
749+
// GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
718750
// ggml_is_contiguous(src1), src1->name);
719751
//}
720752
//if (dst) {
721-
// metal_printf("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
753+
// GGML_METAL_LOG_INFO("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
722754
// dst->name);
723755
//}
724756

@@ -830,7 +862,7 @@ void ggml_metal_graph_compute(
830862
} break;
831863
default:
832864
{
833-
metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
865+
GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
834866
GGML_ASSERT(false);
835867
}
836868
} break;
@@ -1019,7 +1051,7 @@ void ggml_metal_graph_compute(
10191051
} break;
10201052
default:
10211053
{
1022-
metal_printf("Asserting on type %d\n",(int)src0t);
1054+
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
10231055
GGML_ASSERT(false && "not implemented");
10241056
}
10251057
};
@@ -1261,7 +1293,7 @@ void ggml_metal_graph_compute(
12611293
} break;
12621294
default:
12631295
{
1264-
metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
1296+
GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
12651297
GGML_ASSERT(false);
12661298
}
12671299
}
@@ -1286,7 +1318,7 @@ void ggml_metal_graph_compute(
12861318

12871319
MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
12881320
if (status != MTLCommandBufferStatusCompleted) {
1289-
metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status);
1321+
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
12901322
GGML_ASSERT(false);
12911323
}
12921324
}

ggml.h

+7
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,12 @@ extern "C" {
445445
GGML_OBJECT_WORK_BUFFER
446446
};
447447

448+
enum ggml_log_level {
449+
GGML_LOG_LEVEL_ERROR = 2,
450+
GGML_LOG_LEVEL_WARN = 3,
451+
GGML_LOG_LEVEL_INFO = 4
452+
};
453+
448454
// ggml object
449455
struct ggml_object {
450456
size_t offs;
@@ -1691,6 +1697,7 @@ extern "C" {
16911697
};
16921698

16931699
typedef void (*ggml_opt_callback)(void * data, float * sched);
1700+
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
16941701

16951702
// optimization parameters
16961703
//

0 commit comments

Comments
 (0)