Skip to content

Commit c43c2da

Browse files
committed
llm : fix llm_build_kqv taking unused tensor (benign, #3837)
1 parent 523e49b commit c43c2da

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

llama.cpp

+9-10
Original file line numberDiff line numberDiff line change
@@ -3345,7 +3345,6 @@ static struct ggml_tensor * llm_build_ffn(
33453345
// if max_alibi_bias > 0 then apply ALiBi
33463346
static struct ggml_tensor * llm_build_kqv(
33473347
struct ggml_context * ctx,
3348-
struct ggml_tensor * cur,
33493348
const llama_hparams & hparams,
33503349
const llama_kv_cache & kv,
33513350
struct ggml_tensor * wo,
@@ -3411,7 +3410,7 @@ static struct ggml_tensor * llm_build_kqv(
34113410
struct ggml_tensor * kqv_merged = ggml_permute(ctx, kqv, 0, 2, 1, 3);
34123411
cb(kqv_merged, "kqv_merged", il);
34133412

3414-
cur = ggml_cont_2d(ctx, kqv_merged, n_embd, n_tokens);
3413+
struct ggml_tensor * cur = ggml_cont_2d(ctx, kqv_merged, n_embd, n_tokens);
34153414
cb(cur, "kqv_merged_cont", il);
34163415

34173416
cur = ggml_mul_mat(ctx, wo, cur);
@@ -3565,7 +3564,7 @@ struct llm_build_context {
35653564

35663565
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
35673566

3568-
cur = llm_build_kqv(ctx0, cur, hparams, kv_self,
3567+
cur = llm_build_kqv(ctx0, hparams, kv_self,
35693568
model.layers[il].wo, NULL,
35703569
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, cb, il);
35713570
cb(cur, "kqv_out", il);
@@ -3677,7 +3676,7 @@ struct llm_build_context {
36773676
// apply ALiBi for 13B model
36783677
const float max_alibi_bias = model.type == MODEL_13B ? 8.0f : -1.0f;
36793678

3680-
cur = llm_build_kqv(ctx0, cur, hparams, kv_self,
3679+
cur = llm_build_kqv(ctx0, hparams, kv_self,
36813680
model.layers[il].wo, NULL,
36823681
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, max_alibi_bias, cb, il);
36833682
cb(cur, "kqv_out", il);
@@ -3795,7 +3794,7 @@ struct llm_build_context {
37953794

37963795
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
37973796

3798-
cur = llm_build_kqv(ctx0, attn_norm, hparams, kv_self,
3797+
cur = llm_build_kqv(ctx0, hparams, kv_self,
37993798
model.layers[il].wo, NULL,
38003799
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, cb, il);
38013800
cb(cur, "kqv_out", il);
@@ -3895,7 +3894,7 @@ struct llm_build_context {
38953894

38963895
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
38973896

3898-
cur = llm_build_kqv(ctx0, cur, hparams, kv_self,
3897+
cur = llm_build_kqv(ctx0, hparams, kv_self,
38993898
model.layers[il].wo, model.layers[il].bo,
39003899
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, cb, il);
39013900
cb(cur, "kqv_out", il);
@@ -4100,7 +4099,7 @@ struct llm_build_context {
41004099
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
41014100

41024101
// TODO: not tested, could be broken
4103-
cur = llm_build_kqv(ctx0, Q, hparams, kv_self,
4102+
cur = llm_build_kqv(ctx0, hparams, kv_self,
41044103
model.layers[il].wo, model.layers[il].bo,
41054104
Q, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, cb, il);
41064105
cb(cur, "kqv_out", il);
@@ -4191,7 +4190,7 @@ struct llm_build_context {
41914190

41924191
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
41934192

4194-
cur = llm_build_kqv(ctx0, Qcur, hparams, kv_self,
4193+
cur = llm_build_kqv(ctx0, hparams, kv_self,
41954194
model.layers[il].wo, NULL,
41964195
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, 8.0f, cb, il);
41974196
cb(cur, "kqv_out", il);
@@ -4288,7 +4287,7 @@ struct llm_build_context {
42884287

42894288
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
42904289

4291-
cur = llm_build_kqv(ctx0, Qcur, hparams, kv_self,
4290+
cur = llm_build_kqv(ctx0, hparams, kv_self,
42924291
model.layers[il].wo, model.layers[il].bo,
42934292
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, 8.0f, cb, il);
42944293
cb(cur, "kqv_out", il);
@@ -4382,7 +4381,7 @@ struct llm_build_context {
43824381

43834382
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
43844383

4385-
cur = llm_build_kqv(ctx0, Qcur, hparams, kv_self,
4384+
cur = llm_build_kqv(ctx0, hparams, kv_self,
43864385
model.layers[il].wo, NULL,
43874386
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, hparams.f_max_alibi_bias, cb, il);
43884387
cb(cur, "kqv_out", il);

0 commit comments

Comments
 (0)