Skip to content

Commit b1ca8f3

Browse files
authored
ggml : disable GGML_TASK_INIT and GGML_TASK_FINALIZE by default (#1995)
Will not be scheduled unless explicitly enabled.
1 parent b8c8dda commit b1ca8f3

File tree

2 files changed

+55
-9
lines changed

2 files changed

+55
-9
lines changed

ggml.c

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3846,6 +3846,40 @@ static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
38463846
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
38473847
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
38483848

3849+
// WARN:
3850+
// Mis-confguration can lead to problem that's hard to reason about:
3851+
// * At best it crash or talks nosense.
3852+
// * At worst it talks slightly difference but hard to perceive.
3853+
//
3854+
// An op has to enable INIT or FINALIZE when any of it's branch needs that pass.
3855+
// Take care about compile options (e.g., GGML_USE_xxx).
3856+
static bool GGML_OP_HAS_INIT [GGML_OP_COUNT] = { 0 };
3857+
static bool GGML_OP_HAS_FINALIZE[GGML_OP_COUNT] = { 0 };
3858+
static void ggml_setup_op_has_task_pass(void) {
3859+
{ // INIT
3860+
bool * I = GGML_OP_HAS_INIT;
3861+
3862+
I[GGML_OP_ACC ] = true;
3863+
I[GGML_OP_MUL_MAT ] = true;
3864+
I[GGML_OP_OUT_PROD ] = true;
3865+
I[GGML_OP_SET ] = true;
3866+
I[GGML_OP_GET_ROWS_BACK ] = true;
3867+
I[GGML_OP_DIAG_MASK_INF ] = true;
3868+
I[GGML_OP_DIAG_MASK_ZERO ] = true;
3869+
I[GGML_OP_CONV_1D_S1_PH ] = true;
3870+
I[GGML_OP_CONV_1D_S2_PH ] = true;
3871+
I[GGML_OP_CONV_2D_SK_P0 ] = true;
3872+
I[GGML_OP_FLASH_ATTN_BACK ] = true;
3873+
I[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
3874+
}
3875+
3876+
{ // FINALIZE
3877+
bool * F = GGML_OP_HAS_FINALIZE;
3878+
3879+
F[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
3880+
}
3881+
}
3882+
38493883
//
38503884
// ggml context
38513885
//
@@ -4267,6 +4301,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
42674301
ggml_cl_init();
42684302
#endif
42694303

4304+
ggml_setup_op_has_task_pass();
4305+
42704306
is_first_call = false;
42714307
}
42724308

@@ -16791,9 +16827,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1679116827
if (node_n != -1) {
1679216828
/* FINALIZE */
1679316829
struct ggml_tensor * node = state->shared->cgraph->nodes[node_n];
16794-
params.nth = node->n_tasks;
16795-
ggml_compute_forward(&params, node);
16796-
ggml_graph_compute_perf_stats_node(node, state->shared);
16830+
if (GGML_OP_HAS_FINALIZE[node->op]) {
16831+
params.nth = node->n_tasks;
16832+
ggml_compute_forward(&params, node);
16833+
ggml_graph_compute_perf_stats_node(node, state->shared);
16834+
}
1679716835
}
1679816836

1679916837
// distribute new work or execute it direct if 1T
@@ -16805,20 +16843,25 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1680516843
state->shared->perf_node_start_cycles = ggml_perf_cycles();
1680616844
state->shared->perf_node_start_time_us = ggml_perf_time_us();
1680716845

16846+
params.nth = node->n_tasks;
16847+
1680816848
/* INIT */
16809-
params.type = GGML_TASK_INIT;
16810-
params.nth = node->n_tasks;
16811-
ggml_compute_forward(&params, node);
16849+
if (GGML_OP_HAS_INIT[node->op]) {
16850+
params.type = GGML_TASK_INIT;
16851+
ggml_compute_forward(&params, node);
16852+
}
1681216853

1681316854
if (node->n_tasks == 1) {
1681416855
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
1681516856
// they do something more efficient than spinning (?)
1681616857
params.type = GGML_TASK_COMPUTE;
1681716858
ggml_compute_forward(&params, node);
1681816859

16819-
params.type = GGML_TASK_FINALIZE;
16820-
ggml_compute_forward(&params, node);
16821-
ggml_graph_compute_perf_stats_node(node, state->shared);
16860+
if (GGML_OP_HAS_FINALIZE[node->op]) {
16861+
params.type = GGML_TASK_FINALIZE;
16862+
ggml_compute_forward(&params, node);
16863+
ggml_graph_compute_perf_stats_node(node, state->shared);
16864+
}
1682216865
} else {
1682316866
break;
1682416867
}

ggml.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ extern "C" {
444444

445445

446446
// compute types
447+
448+
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
449+
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
447450
enum ggml_task_type {
448451
GGML_TASK_INIT = 0,
449452
GGML_TASK_COMPUTE,

0 commit comments

Comments
 (0)