File tree 1 file changed +3
-1
lines changed 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -3432,6 +3432,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
3432
3432
3433
3433
const std::string name = ggml_get_name (meta);
3434
3434
3435
+ // TODO: avoid hardcoded tensor names - use the TN_* constants
3435
3436
if (name.find (" attn_v.weight" ) != std::string::npos) {
3436
3437
++n_attention_wv;
3437
3438
}
@@ -3510,6 +3511,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
3510
3511
} else {
3511
3512
new_type = quantized_type;
3512
3513
#ifdef GGML_USE_K_QUANTS
3514
+ // TODO: avoid hardcoded tensor names - use the TN_* constants
3513
3515
if (name == TN_OUTPUT) {
3514
3516
int nx = tensor->ne [0 ];
3515
3517
int ny = tensor->ne [1 ];
@@ -3524,7 +3526,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
3524
3526
else if (QK_K == 64 && (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S) &&
3525
3527
(i_attention_wv < n_attention_wv/8 || i_attention_wv >= 7 *n_attention_wv/8 )) new_type = GGML_TYPE_Q6_K;
3526
3528
++i_attention_wv;
3527
- } else if (name.find (" feed_forward.w2 .weight" ) != std::string::npos) {
3529
+ } else if (name.find (" ffn_down .weight" ) != std::string::npos) {
3528
3530
if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K;
3529
3531
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
3530
3532
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
You can’t perform that action at this time.
0 commit comments