From fde165e9685574597cfa6d11fed7f2d975373572 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Aug 2024 14:02:14 +0200 Subject: [PATCH 1/3] default n_swa for phi-3 --- src/llama.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index a7b1c9ebd9e37..33223d73a1f30 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4892,7 +4892,6 @@ static void llm_load_hparams( } break; case LLM_ARCH_PHI3: { - ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { @@ -4901,6 +4900,17 @@ static void llm_load_hparams( case 40: model.type = e_model::MODEL_14B; break; default: model.type = e_model::MODEL_UNKNOWN; } + + if ((hparams.n_layer == 32 || hparams.n_layer == 40)) { + if (hparams.n_ctx_train == 4096) { + // default value for Phi-3-mini-4k-instruct and Phi-3-medium-4k-instruct + hparams.n_swa = 2047; + } else if (hparams.n_ctx_train == 131072) { + // default value for Phi-3-mini-128k-instruct and Phi-3-medium-128k-instruct + hparams.n_swa = 131072; + } + } + ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); } break; case LLM_ARCH_PLAMO: { From c8aeea500fb28f76ffc5e66cc8d2b7006bd81653 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Aug 2024 14:21:40 +0200 Subject: [PATCH 2/3] fix --- src/llama.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 33223d73a1f30..390c6bf621608 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4901,14 +4901,16 @@ static void llm_load_hparams( default: model.type = e_model::MODEL_UNKNOWN; } - if ((hparams.n_layer == 32 || hparams.n_layer == 40)) { - if (hparams.n_ctx_train == 4096) { - // default value for Phi-3-mini-4k-instruct and Phi-3-medium-4k-instruct - hparams.n_swa = 2047; - } else if (hparams.n_ctx_train == 131072) { - // default value for Phi-3-mini-128k-instruct and Phi-3-medium-128k-instruct - hparams.n_swa = 131072; - } + // for backward compatibility ; see: https://github.com/ggerganov/llama.cpp/pull/8931 + if ((hparams.n_layer == 32 || hparams.n_layer == 40) && hparams.n_ctx_train == 4096) { + // default value for Phi-3-mini-4k-instruct and Phi-3-medium-4k-instruct + hparams.n_swa = 2047; + } else if (hparams.n_layer == 32 && hparams.n_head_kv(0) == 32 && hparams.n_ctx_train == 131072) { + // default value for Phi-3-mini-128k-instruct + hparams.n_swa = 262144; + } else if (hparams.n_layer == 40 && hparams.n_ctx_train == 131072) { + // default value for Phi-3-medium-128k-instruct + hparams.n_swa = 131072; } ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); } break; From 2913e5ff1ef2c0c9a2cc7cc886f24f2dfaeda281 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 10 Aug 2024 11:24:50 +0200 Subject: [PATCH 3/3] double check swa --- src/llama.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 390c6bf621608..b236bf0406624 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4912,7 +4912,10 @@ static void llm_load_hparams( // default value for Phi-3-medium-128k-instruct hparams.n_swa = 131072; } - ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); + bool found_swa = ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); + if (!found_swa && hparams.n_swa == 0) { + throw std::runtime_error("invalid value for sliding_window"); + } } break; case LLM_ARCH_PLAMO: {