Skip to content

Commit 1a51b36

Browse files
author
fmz
committed
cleanup
1 parent 2b077c9 commit 1a51b36

File tree

4 files changed

+76
-86
lines changed

4 files changed

+76
-86
lines changed

convert-hf-to-gguf.py

Lines changed: 72 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,80 +2768,6 @@ def write_tensors(self):
27682768
if len(experts) > 0:
27692769
raise ValueError(f"Unprocessed experts: {experts}")
27702770

2771-
@Model.register("JAISLMHeadModel")
2772-
class JaisModel(Model):
2773-
model_arch = gguf.MODEL_ARCH.JAIS
2774-
2775-
def __init__(self, *args, **kwargs):
2776-
super().__init__(*args, **kwargs)
2777-
2778-
# SwigLU activation
2779-
assert self.hparams["activation_function"] == "swiglu"
2780-
# ALiBi position embedding
2781-
assert self.hparams["position_embedding_type"] == "alibi"
2782-
2783-
# Embeddings scale
2784-
self.embeddings_scale = 1.0
2785-
# note: For some JAIS flavors, output is tied to (same as) wte in original model
2786-
self.output_is_wte = False
2787-
if 'mup_embeddings_scale' in self.hparams:
2788-
self.output_is_wte = True # Hack (?)
2789-
self.embeddings_scale = self.hparams['mup_embeddings_scale']
2790-
elif 'embeddings_scale' in self.hparams:
2791-
self.embeddings_scale = self.hparams['embeddings_scale']
2792-
else:
2793-
assert False
2794-
2795-
self.width_scale = 1.0
2796-
if 'mup_output_alpha' in self.hparams:
2797-
assert 'mup_width_scale' in self.hparams
2798-
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
2799-
elif 'width_scale' in self.hparams:
2800-
self.width_scale = self.hparams['width_scale']
2801-
else:
2802-
assert False
2803-
2804-
def set_vocab(self):
2805-
self._set_vocab_gpt2()
2806-
2807-
def set_gguf_parameters(self):
2808-
self.gguf_writer.add_name(self.dir_model.name)
2809-
self.gguf_writer.add_block_count(self.hparams["n_layer"])
2810-
self.gguf_writer.add_context_length(self.hparams["n_positions"])
2811-
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
2812-
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
2813-
self.gguf_writer.add_head_count(self.hparams["n_head"])
2814-
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
2815-
self.gguf_writer.add_file_type(self.ftype)
2816-
2817-
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2818-
del bid # unused
2819-
2820-
tensors: list[tuple[str, Tensor]] = []
2821-
2822-
# we don't need these
2823-
if name.endswith((".attn.bias", "relative_pe.slopes")):
2824-
return tensors
2825-
2826-
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
2827-
data_torch = data_torch.transpose(1, 0)
2828-
2829-
new_name = self.map_tensor_name(name)
2830-
2831-
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
2832-
tensors.append((new_name, data_torch * self.embeddings_scale))
2833-
if self.output_is_wte:
2834-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
2835-
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
2836-
assert not self.output_is_wte
2837-
tensors.append((new_name, data_torch * self.width_scale))
2838-
else:
2839-
tensors.append((new_name, data_torch))
2840-
2841-
return tensors
2842-
2843-
2844-
28452771
@Model.register("T5ForConditionalGeneration")
28462772
@Model.register("T5WithLMHeadModel")
28472773
class T5Model(Model):
@@ -2959,6 +2885,78 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29592885

29602886
return [(self.map_tensor_name(name), data_torch)]
29612887

2888+
@Model.register("JAISLMHeadModel")
2889+
class JaisModel(Model):
2890+
model_arch = gguf.MODEL_ARCH.JAIS
2891+
2892+
def __init__(self, *args, **kwargs):
2893+
super().__init__(*args, **kwargs)
2894+
2895+
# SwigLU activation
2896+
assert self.hparams["activation_function"] == "swiglu"
2897+
# ALiBi position embedding
2898+
assert self.hparams["position_embedding_type"] == "alibi"
2899+
2900+
# Embeddings scale
2901+
self.embeddings_scale = 1.0
2902+
# note: For some JAIS flavors, output is tied to (same as) wte in original model
2903+
self.output_is_wte = False
2904+
if 'mup_embeddings_scale' in self.hparams:
2905+
self.output_is_wte = True # Hack (?)
2906+
self.embeddings_scale = self.hparams['mup_embeddings_scale']
2907+
elif 'embeddings_scale' in self.hparams:
2908+
self.embeddings_scale = self.hparams['embeddings_scale']
2909+
else:
2910+
assert False
2911+
2912+
self.width_scale = 1.0
2913+
if 'mup_output_alpha' in self.hparams:
2914+
assert 'mup_width_scale' in self.hparams
2915+
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
2916+
elif 'width_scale' in self.hparams:
2917+
self.width_scale = self.hparams['width_scale']
2918+
else:
2919+
assert False
2920+
2921+
def set_vocab(self):
2922+
self._set_vocab_gpt2()
2923+
2924+
def set_gguf_parameters(self):
2925+
self.gguf_writer.add_name(self.dir_model.name)
2926+
self.gguf_writer.add_block_count(self.hparams["n_layer"])
2927+
self.gguf_writer.add_context_length(self.hparams["n_positions"])
2928+
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
2929+
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
2930+
self.gguf_writer.add_head_count(self.hparams["n_head"])
2931+
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
2932+
self.gguf_writer.add_file_type(self.ftype)
2933+
2934+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2935+
del bid # unused
2936+
2937+
tensors: list[tuple[str, Tensor]] = []
2938+
2939+
# we don't need these
2940+
if name.endswith((".attn.bias", "relative_pe.slopes")):
2941+
return tensors
2942+
2943+
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
2944+
data_torch = data_torch.transpose(1, 0)
2945+
2946+
new_name = self.map_tensor_name(name)
2947+
2948+
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
2949+
tensors.append((new_name, data_torch * self.embeddings_scale))
2950+
if self.output_is_wte:
2951+
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
2952+
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
2953+
assert not self.output_is_wte
2954+
tensors.append((new_name, data_torch * self.width_scale))
2955+
else:
2956+
tensors.append((new_name, data_torch))
2957+
2958+
return tensors
2959+
29622960

29632961
###### CONVERSION LOGIC ######
29642962

examples/main/main.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,7 @@ int main(int argc, char ** argv) {
733733

734734
// Console/Stream Output
735735
fprintf(stdout, "%s", token_str.c_str());
736+
736737
// Record Displayed Tokens To Log
737738
// Note: Generated tokens are created one by one hence this check
738739
if (embd.size() > 1) {

ggml/src/ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13516,13 +13516,13 @@ static void ggml_compute_forward_soft_max_f32(
1351613516
} else {
1351713517
for (int i = 0; i < nc; ++i) {
1351813518
wp[i] += slope*mp_f32[i];
13519-
1352013519
}
1352113520
}
1352213521
}
1352313522

1352413523
#ifndef NDEBUG
1352513524
for (int i = 0; i < nc; ++i) {
13525+
//printf("p[%d] = %f\n", i, p[i]);
1352613526
assert(!isnan(wp[i]));
1352713527
}
1352813528
#endif

src/llama.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6899,6 +6899,8 @@ static bool llm_load_tensors(
68996899
} break;
69006900
case LLM_ARCH_JAIS:
69016901
{
6902+
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
6903+
69026904
// Output
69036905
{
69046906
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
@@ -6931,7 +6933,6 @@ static bool llm_load_tensors(
69316933

69326934
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
69336935
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
6934-
69356936
}
69366937
} break;
69376938
default:
@@ -12187,23 +12188,13 @@ struct llm_build_context {
1218712188
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
1218812189

1218912190
struct ggml_tensor * cur;
12190-
//struct ggml_tensor * pos;
1219112191
struct ggml_tensor * inpL;
1219212192

1219312193
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
1219412194

12195-
// // inp_pos - contains the positions
12196-
// struct ggml_tensor * inp_pos = build_inp_pos();
12197-
1219812195
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
1219912196
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
1220012197

12201-
// pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos);
12202-
// cb(pos, "pos_embd", -1);
12203-
12204-
// inpL = ggml_add(ctx0, inpL, pos);
12205-
// cb(inpL, "inpL", -1);
12206-
1220712198
for (int il = 0; il < n_layer; ++il) {
1220812199
cur = llm_build_norm(ctx0, inpL, hparams,
1220912200
model.layers[il].attn_norm,

0 commit comments

Comments
 (0)