Skip to content

graph : fix stack-use-after-return #14960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 30, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions src/llama-graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class llm_graph_input_pos_bucket : public llm_graph_input_i {

ggml_tensor * pos_bucket = nullptr; // I32 [n_batch, n_batch]

const llama_hparams & hparams;
const llama_hparams hparams;
};

class llm_graph_input_pos_bucket_kv : public llm_graph_input_i {
Expand All @@ -158,7 +158,7 @@ class llm_graph_input_pos_bucket_kv : public llm_graph_input_i {

ggml_tensor * pos_bucket = nullptr; // I32 [n_kv, n_batch]

const llama_hparams & hparams;
const llama_hparams hparams;

const llama_kv_cache_unified_context * mctx;
};
Expand All @@ -177,8 +177,8 @@ class llm_graph_input_out_ids : public llm_graph_input_i {

ggml_tensor * out_ids; // I32 [n_outputs]

const llama_hparams & hparams;
const llama_cparams & cparams;
const llama_hparams hparams;
const llama_cparams cparams;

const uint32_t n_outputs;
};
Expand All @@ -192,7 +192,7 @@ class llm_graph_input_mean : public llm_graph_input_i {

ggml_tensor * mean; // F32 [n_batch, n_batch]

const llama_cparams & cparams;
const llama_cparams cparams;
};

class llm_graph_input_cls : public llm_graph_input_i {
Expand All @@ -204,7 +204,7 @@ class llm_graph_input_cls : public llm_graph_input_i {

ggml_tensor * cls; // I32 [n_batch]

const llama_cparams & cparams;
const llama_cparams cparams;
};

class llm_graph_input_rs : public llm_graph_input_i {
Expand Down Expand Up @@ -247,8 +247,8 @@ class llm_graph_input_attn_no_cache : public llm_graph_input_i {
ggml_tensor * kq_mask = nullptr; // F32 [n_tokens, n_batch, 1, 1]
ggml_tensor * kq_mask_cnv = nullptr; // [n_tokens, n_batch, 1, 1]

const llama_hparams & hparams;
const llama_cparams & cparams;
const llama_hparams hparams;
const llama_cparams cparams;
};

class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
Expand Down Expand Up @@ -278,8 +278,11 @@ class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]

const llama_hparams & hparams;
const llama_cparams & cparams;
// note: these have to be copies because in order to be able to reuse a graph, its inputs
// need to carry these parameters with them. otherwise, they can point to freed
// llm_graph_params from a previous batch, causing stack-use-after-return
const llama_hparams hparams;
const llama_cparams cparams;

const llama_kv_cache_unified_context * mctx;
};
Expand Down Expand Up @@ -318,8 +321,8 @@ class llm_graph_input_attn_kv_unified_iswa : public llm_graph_input_i {
ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
ggml_tensor * self_kq_mask_swa_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]

const llama_hparams & hparams;
const llama_cparams & cparams;
const llama_hparams hparams;
const llama_cparams cparams;

const llama_kv_cache_unified_iswa_context * mctx;
};
Expand Down
Loading