@@ -6172,9 +6172,9 @@ static bool llm_load_tensors(
6172
6172
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
6173
6173
6174
6174
// optional MLP bias
6175
- layer.ffn_gate_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6176
- layer.ffn_down_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
6177
- layer.ffn_up_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6175
+ layer.ffn_gate_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6176
+ layer.ffn_down_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
6177
+ layer.ffn_up_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6178
6178
} else {
6179
6179
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
6180
6180
@@ -6498,7 +6498,7 @@ static bool llm_load_tensors(
6498
6498
layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa});
6499
6499
6500
6500
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); //output_dens
6501
- layer.bo = ml.create_tensor(ctx_split , tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
6501
+ layer.bo = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
6502
6502
6503
6503
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
6504
6504
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
0 commit comments