From c8f1ae52cf612668f28ec8a4710e8205bcaf69bb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 20 Sep 2025 12:19:49 +0300 Subject: [PATCH 1/3] ggml : extend ggml_can_fuse to work with non-sequential nodes in the graph --- ggml/src/ggml-impl.h | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 19a7adb2d101b..825f31f601300 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -570,27 +570,27 @@ static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int n return true; } -// Returns true if nodes [i, i+ops.size()) are the sequence of ggml_ops in ops[] +// Returns true if nodes with indices { node_idxs } are the sequence of ggml_ops in ops[] // and are fusable. Nodes are considered fusable according to this function if: // - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses). // - all nodes except the last are a src of the following node. // - all nodes are the same shape. // TODO: Consider allowing GGML_OP_NONE nodes in between -static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) { - if (node_idx + num_ops > cgraph->n_nodes) { - return false; - } - +static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const int * node_idxs, const enum ggml_op * ops, int num_ops) { for (int i = 0; i < num_ops; ++i) { - struct ggml_tensor * node = cgraph->nodes[node_idx + i]; + if (node_idxs[i] + num_ops > cgraph->n_nodes) { + return false; + } + + struct ggml_tensor * node = cgraph->nodes[node_idxs[i]]; if (node->op != ops[i]) { return false; } - if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idx + i, 1)) { + if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idxs[i], 1)) { return false; } if (i > 0) { - struct ggml_tensor * prev = cgraph->nodes[node_idx + i - 1]; + struct ggml_tensor * prev = cgraph->nodes[node_idxs[i - 1]]; if (node->src[0] != prev && node->src[1] != prev) { return false; } @@ -602,6 +602,18 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx return true; } +// same as above, for sequential indices starting at node_idx +static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) { + assert(num_ops < 32); + + int idxs[32]; + for (int i = 0; i < num_ops; ++i) { + idxs[i] = node_idx + i; + } + + return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops); +} + #ifdef __cplusplus } #endif @@ -615,6 +627,11 @@ inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std:: return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size()); } +inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, std::initializer_list node_idx, std::initializer_list ops) { + assert(node_idx.size() == ops.size()); + return ggml_can_fuse_ext(cgraph, node_idx.begin(), ops.begin(), (int)ops.size()); +} + // expose GGUF internals for test code GGML_API size_t gguf_type_size(enum gguf_type type); GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); From 300a50eb95d1a16e1bf6ed950afb9003570f7258 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 20 Sep 2025 16:57:18 +0300 Subject: [PATCH 2/3] cont : fix wrong bounds check condition --- ggml/src/ggml-impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 825f31f601300..406a019ccec1f 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -578,7 +578,7 @@ static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int n // TODO: Consider allowing GGML_OP_NONE nodes in between static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const int * node_idxs, const enum ggml_op * ops, int num_ops) { for (int i = 0; i < num_ops; ++i) { - if (node_idxs[i] + num_ops > cgraph->n_nodes) { + if (node_idxs[i] >= cgraph->n_nodes) { return false; } From 714489f022fb30d3d3f86d87e5021d6f6cab46c3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 21 Sep 2025 17:44:48 +0300 Subject: [PATCH 3/3] cont : remove unnecessary overload --- ggml/src/ggml-impl.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 406a019ccec1f..4ea39ddde90c4 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -606,6 +606,10 @@ static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const in static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) { assert(num_ops < 32); + if (node_idx + num_ops > cgraph->n_nodes) { + return false; + } + int idxs[32]; for (int i = 0; i < num_ops; ++i) { idxs[i] = node_idx + i; @@ -627,11 +631,6 @@ inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std:: return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size()); } -inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, std::initializer_list node_idx, std::initializer_list ops) { - assert(node_idx.size() == ops.size()); - return ggml_can_fuse_ext(cgraph, node_idx.begin(), ops.begin(), (int)ops.size()); -} - // expose GGUF internals for test code GGML_API size_t gguf_type_size(enum gguf_type type); GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);