Skip to content

Commit 5484737

Browse files
committed
llama : fix tensor name grepping during quantization
ggml-ci
1 parent 57eaadb commit 5484737

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

llama.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3432,6 +3432,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
34323432

34333433
const std::string name = ggml_get_name(meta);
34343434

3435+
// TODO: avoid hardcoded tensor names - use the TN_* constants
34353436
if (name.find("attn_v.weight") != std::string::npos) {
34363437
++n_attention_wv;
34373438
}
@@ -3510,6 +3511,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
35103511
} else {
35113512
new_type = quantized_type;
35123513
#ifdef GGML_USE_K_QUANTS
3514+
// TODO: avoid hardcoded tensor names - use the TN_* constants
35133515
if (name == TN_OUTPUT) {
35143516
int nx = tensor->ne[0];
35153517
int ny = tensor->ne[1];
@@ -3524,7 +3526,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
35243526
else if (QK_K == 64 && (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S) &&
35253527
(i_attention_wv < n_attention_wv/8 || i_attention_wv >= 7*n_attention_wv/8)) new_type = GGML_TYPE_Q6_K;
35263528
++i_attention_wv;
3527-
} else if (name.find("feed_forward.w2.weight") != std::string::npos) {
3529+
} else if (name.find("ffn_down.weight") != std::string::npos) {
35283530
if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K;
35293531
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
35303532
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&

0 commit comments

Comments
 (0)