11
11
#define MIN (a, b ) ((a) < (b) ? (a) : (b))
12
12
#define MAX (a, b ) ((a) > (b) ? (a) : (b))
13
13
14
- // TODO: temporary - reuse llama.cpp logging
15
14
#ifdef GGML_METAL_NDEBUG
16
- #define metal_printf (...)
15
+ #define GGML_METAL_LOG_INFO (...)
16
+ #define GGML_METAL_LOG_WARN (...)
17
+ #define GGML_METAL_LOG_ERROR (...)
17
18
#else
18
- #define metal_printf (...) fprintf(stderr, __VA_ARGS__)
19
+ #define GGML_METAL_LOG_INFO (...) ggml_metal_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
20
+ #define GGML_METAL_LOG_WARN (...) ggml_metal_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
21
+ #define GGML_METAL_LOG_ERROR (...) ggml_metal_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
19
22
#endif
20
23
21
24
#define UNUSED (x ) (void )(x)
@@ -120,8 +123,37 @@ @interface GGMLMetalClass : NSObject
120
123
@implementation GGMLMetalClass
121
124
@end
122
125
126
+ ggml_log_callback ggml_metal_log_callback = NULL ;
127
+ void * ggml_metal_log_user_data = NULL ;
128
+
129
+ void ggml_metal_log_set_callback (ggml_log_callback log_callback, void * user_data) {
130
+ ggml_metal_log_callback = log_callback;
131
+ ggml_metal_log_user_data = user_data;
132
+ }
133
+
134
+ static void ggml_metal_log (enum ggml_log_level level, const char * format, ...){
135
+ if (ggml_metal_log_callback != NULL ) {
136
+ va_list args;
137
+ va_start (args, format);
138
+ char buffer[128 ];
139
+ int len = vsnprintf (buffer, 128 , format, args);
140
+ if (len < 128 ) {
141
+ ggml_metal_log_callback (level, buffer, ggml_metal_log_user_data);
142
+ } else {
143
+ char * buffer2 = malloc (len+1 );
144
+ vsnprintf (buffer2, len+1 , format, args);
145
+ buffer2[len] = 0 ;
146
+ ggml_metal_log_callback (level, buffer2, ggml_metal_log_user_data);
147
+ free (buffer2);
148
+ }
149
+ va_end (args);
150
+ }
151
+ }
152
+
153
+
154
+
123
155
struct ggml_metal_context * ggml_metal_init (int n_cb) {
124
- metal_printf (" %s : allocating\n " , __func__);
156
+ GGML_METAL_LOG_INFO (" %s : allocating\n " , __func__);
125
157
126
158
id <MTLDevice > device;
127
159
NSString * s;
@@ -131,14 +163,14 @@ @implementation GGMLMetalClass
131
163
NSArray * devices = MTLCopyAllDevices ();
132
164
for (device in devices) {
133
165
s = [device name ];
134
- metal_printf (" %s : found device: %s \n " , __func__, [s UTF8String ]);
166
+ GGML_METAL_LOG_INFO (" %s : found device: %s \n " , __func__, [s UTF8String ]);
135
167
}
136
168
#endif
137
169
138
170
// Pick and show default Metal device
139
171
device = MTLCreateSystemDefaultDevice ();
140
172
s = [device name ];
141
- metal_printf (" %s : picking default device: %s \n " , __func__, [s UTF8String ]);
173
+ GGML_METAL_LOG_INFO (" %s : picking default device: %s \n " , __func__, [s UTF8String ]);
142
174
143
175
// Configure context
144
176
struct ggml_metal_context * ctx = malloc (sizeof (struct ggml_metal_context));
@@ -165,7 +197,7 @@ @implementation GGMLMetalClass
165
197
ctx->library = [ctx->device newLibraryWithURL: libURL error: &error];
166
198
167
199
if (error) {
168
- metal_printf (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
200
+ GGML_METAL_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
169
201
return NULL ;
170
202
}
171
203
}
@@ -179,11 +211,11 @@ @implementation GGMLMetalClass
179
211
// NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
180
212
NSBundle * bundle = [NSBundle bundleForClass: [GGMLMetalClass class ]];
181
213
NSString * path = [bundle pathForResource: @" ggml-metal" ofType: @" metal" ];
182
- metal_printf (" %s : loading '%s '\n " , __func__, [path UTF8String ]);
214
+ GGML_METAL_LOG_INFO (" %s : loading '%s '\n " , __func__, [path UTF8String ]);
183
215
184
216
NSString * src = [NSString stringWithContentsOfFile: path encoding: NSUTF8StringEncoding error: &error];
185
217
if (error) {
186
- metal_printf (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
218
+ GGML_METAL_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
187
219
return NULL ;
188
220
}
189
221
@@ -195,7 +227,7 @@ @implementation GGMLMetalClass
195
227
ctx->library = [ctx->device newLibraryWithSource: src options: nil error: &error];
196
228
#endif
197
229
if (error) {
198
- metal_printf (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
230
+ GGML_METAL_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
199
231
return NULL ;
200
232
}
201
233
}
@@ -207,11 +239,11 @@ @implementation GGMLMetalClass
207
239
#define GGML_METAL_ADD_KERNEL (name ) \
208
240
ctx->function_ ##name = [ctx->library newFunctionWithName: @" kernel_" #name]; \
209
241
ctx->pipeline_ ##name = [ctx->device newComputePipelineStateWithFunction: ctx->function_##name error: &error]; \
210
- metal_printf (" %s : loaded %-32s %16p | th_max = %4d | th_width = %4d \n " , __func__, " kernel_" #name, (void *) ctx->pipeline_ ##name, \
242
+ GGML_METAL_LOG_INFO (" %s : loaded %-32s %16p | th_max = %4d | th_width = %4d \n " , __func__, " kernel_" #name, (void *) ctx->pipeline_ ##name, \
211
243
(int ) ctx->pipeline_ ##name.maxTotalThreadsPerThreadgroup , \
212
244
(int ) ctx->pipeline_ ##name.threadExecutionWidth ); \
213
245
if (error) { \
214
- metal_printf (" %s : load pipeline error: %s \n " , __func__, [[error description ] UTF8String ]); \
246
+ GGML_METAL_LOG_ERROR (" %s : error : load pipeline error: %s \n " , __func__, [[error description ] UTF8String ]); \
215
247
return NULL ; \
216
248
}
217
249
@@ -270,21 +302,21 @@ @implementation GGMLMetalClass
270
302
#undef GGML_METAL_ADD_KERNEL
271
303
}
272
304
273
- metal_printf (" %s : hasUnifiedMemory = %s \n " , __func__, ctx->device .hasUnifiedMemory ? " true" : " false" );
305
+ GGML_METAL_LOG_INFO (" %s : hasUnifiedMemory = %s \n " , __func__, ctx->device .hasUnifiedMemory ? " true" : " false" );
274
306
#if TARGET_OS_OSX
275
- metal_printf (" %s : recommendedMaxWorkingSetSize = %8.2f MB\n " , __func__, ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
307
+ GGML_METAL_LOG_INFO (" %s : recommendedMaxWorkingSetSize = %8.2f MB\n " , __func__, ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
276
308
if (ctx->device .maxTransferRate != 0 ) {
277
- metal_printf (" %s : maxTransferRate = %8.2f MB/s\n " , __func__, ctx->device .maxTransferRate / 1024.0 / 1024.0 );
309
+ GGML_METAL_LOG_INFO (" %s : maxTransferRate = %8.2f MB/s\n " , __func__, ctx->device .maxTransferRate / 1024.0 / 1024.0 );
278
310
} else {
279
- metal_printf (" %s : maxTransferRate = built-in GPU\n " , __func__);
311
+ GGML_METAL_LOG_INFO (" %s : maxTransferRate = built-in GPU\n " , __func__);
280
312
}
281
313
#endif
282
314
283
315
return ctx;
284
316
}
285
317
286
318
void ggml_metal_free (struct ggml_metal_context * ctx) {
287
- metal_printf (" %s : deallocating\n " , __func__);
319
+ GGML_METAL_LOG_INFO (" %s : deallocating\n " , __func__);
288
320
#define GGML_METAL_DEL_KERNEL (name ) \
289
321
[ctx->function_##name release ]; \
290
322
[ctx->pipeline_##name release ];
@@ -360,7 +392,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
360
392
void * data = NULL ;
361
393
const int result = posix_memalign ((void **) &data, sysconf (_SC_PAGESIZE), n);
362
394
if (result != 0 ) {
363
- metal_printf (" %s : error: posix_memalign failed\n " , __func__);
395
+ GGML_METAL_LOG_ERROR (" %s : error: posix_memalign failed\n " , __func__);
364
396
return NULL ;
365
397
}
366
398
@@ -388,7 +420,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
388
420
// Metal buffer based on the host memory pointer
389
421
//
390
422
static id <MTLBuffer > ggml_metal_get_buffer (struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
391
- // metal_printf ("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
423
+ // GGML_METAL_LOG_INFO ("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
392
424
393
425
const int64_t tsize = ggml_nbytes (t);
394
426
@@ -400,13 +432,13 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
400
432
if (ioffs >= 0 && ioffs + tsize <= (int64_t ) ctx->buffers [i].size ) {
401
433
*offs = (size_t ) ioffs;
402
434
403
- // metal_printf ("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
435
+ // GGML_METAL_LOG_INFO ("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
404
436
405
437
return ctx->buffers [i].metal ;
406
438
}
407
439
}
408
440
409
- metal_printf (" %s : error: buffer is nil\n " , __func__);
441
+ GGML_METAL_LOG_ERROR (" %s : error: buffer is nil\n " , __func__);
410
442
411
443
return nil ;
412
444
}
@@ -418,7 +450,7 @@ bool ggml_metal_add_buffer(
418
450
size_t size,
419
451
size_t max_size) {
420
452
if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
421
- metal_printf (" %s : too many buffers\n " , __func__);
453
+ GGML_METAL_LOG_ERROR (" %s : error : too many buffers\n " , __func__);
422
454
return false ;
423
455
}
424
456
@@ -428,7 +460,7 @@ bool ggml_metal_add_buffer(
428
460
const int64_t ioffs = (int64_t ) data - (int64_t ) ctx->buffers [i].data ;
429
461
430
462
if (ioffs >= 0 && ioffs < (int64_t ) ctx->buffers [i].size ) {
431
- metal_printf (" %s : error: buffer '%s ' overlaps with '%s '\n " , __func__, name, ctx->buffers [i].name );
463
+ GGML_METAL_LOG_ERROR (" %s : error: buffer '%s ' overlaps with '%s '\n " , __func__, name, ctx->buffers [i].name );
432
464
return false ;
433
465
}
434
466
}
@@ -449,11 +481,11 @@ bool ggml_metal_add_buffer(
449
481
ctx->buffers [ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy: data length: size_aligned options: MTLResourceStorageModeShared deallocator: nil ];
450
482
451
483
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
452
- metal_printf (" %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_aligned / 1024.0 / 1024.0 );
484
+ GGML_METAL_LOG_ERROR (" %s : error : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_aligned / 1024.0 / 1024.0 );
453
485
return false ;
454
486
}
455
487
456
- metal_printf (" %s : allocated '%-16s ' buffer, size = %8.2f MB" , __func__, name, size_aligned / 1024.0 / 1024.0 );
488
+ GGML_METAL_LOG_INFO (" %s : allocated '%-16s ' buffer, size = %8.2f MB" , __func__, name, size_aligned / 1024.0 / 1024.0 );
457
489
458
490
++ctx->n_buffers ;
459
491
} else {
@@ -473,31 +505,31 @@ bool ggml_metal_add_buffer(
473
505
ctx->buffers [ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy: (void *) ((uint8_t *) data + i) length: size_step_aligned options: MTLResourceStorageModeShared deallocator: nil ];
474
506
475
507
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
476
- metal_printf (" %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_step_aligned / 1024.0 / 1024.0 );
508
+ GGML_METAL_LOG_ERROR (" %s : error : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_step_aligned / 1024.0 / 1024.0 );
477
509
return false ;
478
510
}
479
511
480
- metal_printf (" %s : allocated '%-16s ' buffer, size = %8.2f MB, offs = %12ld " , __func__, name, size_step_aligned / 1024.0 / 1024.0 , i);
512
+ GGML_METAL_LOG_INFO (" %s : allocated '%-16s ' buffer, size = %8.2f MB, offs = %12ld " , __func__, name, size_step_aligned / 1024.0 / 1024.0 , i);
481
513
if (i + size_step < size) {
482
- metal_printf (" \n " );
514
+ GGML_METAL_LOG_INFO (" \n " );
483
515
}
484
516
485
517
++ctx->n_buffers ;
486
518
}
487
519
}
488
520
489
521
#if TARGET_OS_OSX
490
- metal_printf (" , (%8.2f / %8.2f )" ,
522
+ GGML_METAL_LOG_INFO (" , (%8.2f / %8.2f )" ,
491
523
ctx->device .currentAllocatedSize / 1024.0 / 1024.0 ,
492
524
ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
493
525
494
526
if (ctx->device .currentAllocatedSize > ctx->device .recommendedMaxWorkingSetSize ) {
495
- metal_printf (" , warning: current allocated size is greater than the recommended max working set size\n " );
527
+ GGML_METAL_LOG_WARN (" , warning: current allocated size is greater than the recommended max working set size\n " , __func__ );
496
528
} else {
497
- metal_printf (" \n " );
529
+ GGML_METAL_LOG_INFO (" \n " );
498
530
}
499
531
#else
500
- metal_printf (" , (%8.2f )\n " , ctx->device .currentAllocatedSize / 1024.0 / 1024.0 );
532
+ GGML_METAL_LOG_INFO (" , (%8.2f )\n " , ctx->device .currentAllocatedSize / 1024.0 / 1024.0 );
501
533
#endif
502
534
}
503
535
@@ -610,7 +642,7 @@ void ggml_metal_graph_find_concurrency(
610
642
}
611
643
612
644
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
613
- metal_printf (" %s : too many elements for metal ctx->concur_list!\n " , __func__);
645
+ GGML_METAL_LOG_WARN (" %s : too many elements for metal ctx->concur_list!\n " , __func__);
614
646
}
615
647
}
616
648
@@ -664,7 +696,7 @@ void ggml_metal_graph_compute(
664
696
continue ;
665
697
}
666
698
667
- // metal_printf ("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
699
+ // GGML_METAL_LOG_INFO ("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
668
700
669
701
struct ggml_tensor * src0 = gf->nodes [i]->src [0 ];
670
702
struct ggml_tensor * src1 = gf->nodes [i]->src [1 ];
@@ -708,17 +740,17 @@ void ggml_metal_graph_compute(
708
740
id <MTLBuffer > id_src1 = src1 ? ggml_metal_get_buffer (ctx, src1, &offs_src1) : nil ;
709
741
id <MTLBuffer > id_dst = dst ? ggml_metal_get_buffer (ctx, dst, &offs_dst) : nil ;
710
742
711
- // metal_printf ("%s: op - %s\n", __func__, ggml_op_name(dst->op));
743
+ // GGML_METAL_LOG_INFO ("%s: op - %s\n", __func__, ggml_op_name(dst->op));
712
744
// if (src0) {
713
- // metal_printf ("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
745
+ // GGML_METAL_LOG_INFO ("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
714
746
// ggml_is_contiguous(src0), src0->name);
715
747
// }
716
748
// if (src1) {
717
- // metal_printf ("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
749
+ // GGML_METAL_LOG_INFO ("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
718
750
// ggml_is_contiguous(src1), src1->name);
719
751
// }
720
752
// if (dst) {
721
- // metal_printf ("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
753
+ // GGML_METAL_LOG_INFO ("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
722
754
// dst->name);
723
755
// }
724
756
@@ -830,7 +862,7 @@ void ggml_metal_graph_compute(
830
862
} break ;
831
863
default :
832
864
{
833
- metal_printf (" %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
865
+ GGML_METAL_LOG_WARN (" %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
834
866
GGML_ASSERT (false );
835
867
}
836
868
} break ;
@@ -1019,7 +1051,7 @@ void ggml_metal_graph_compute(
1019
1051
} break ;
1020
1052
default :
1021
1053
{
1022
- metal_printf (" Asserting on type %d \n " ,(int )src0t);
1054
+ GGML_METAL_LOG_ERROR (" Asserting on type %d \n " , (int )src0t);
1023
1055
GGML_ASSERT (false && " not implemented" );
1024
1056
}
1025
1057
};
@@ -1261,7 +1293,7 @@ void ggml_metal_graph_compute(
1261
1293
} break ;
1262
1294
default :
1263
1295
{
1264
- metal_printf (" %s : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
1296
+ GGML_METAL_LOG_ERROR (" %s : error : node %3d , op = %8s not implemented\n " , __func__, i, ggml_op_name (dst->op ));
1265
1297
GGML_ASSERT (false );
1266
1298
}
1267
1299
}
@@ -1286,7 +1318,7 @@ void ggml_metal_graph_compute(
1286
1318
1287
1319
MTLCommandBufferStatus status = (MTLCommandBufferStatus ) [ctx->command_buffers[i] status ];
1288
1320
if (status != MTLCommandBufferStatusCompleted ) {
1289
- metal_printf (" %s : command buffer %d failed with status %lu \n " , __func__, i, status);
1321
+ GGML_METAL_LOG_INFO (" %s : command buffer %d failed with status %lu \n " , __func__, i, status);
1290
1322
GGML_ASSERT (false );
1291
1323
}
1292
1324
}
0 commit comments