Skip to content

Commit ad66a8f

Browse files
convert: text-only support for GLM-4.1V-9B-Thinking (#14495)
* use language_model part only * set missing head_dim
1 parent 38d3af1 commit ad66a8f

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6517,6 +6517,20 @@ def set_gguf_parameters(self):
65176517
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
65186518

65196519

6520+
@ModelBase.register("Glm4vForConditionalGeneration")
6521+
class Glm4vThinkingModel(Glm4Model):
6522+
model_arch = gguf.MODEL_ARCH.GLM4
6523+
6524+
def set_gguf_parameters(self):
6525+
self.hparams["head_dim"] = 128
6526+
super().set_gguf_parameters()
6527+
6528+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
6529+
if name.startswith("model.visual."):
6530+
return []
6531+
return super().modify_tensors(data_torch, name, bid)
6532+
6533+
65206534
@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
65216535
class ChatGLMModel(TextModel):
65226536
model_arch = gguf.MODEL_ARCH.CHATGLM

gguf-py/gguf/tensor_mapping.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class TensorNameMap:
1414
"transformer.word_embeddings", # falcon
1515
"word_embeddings", # bloom
1616
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid
17+
"model.language_model.embed_tokens", # glm-4-thinking
1718
"tok_embeddings", # llama-pth
1819
"embeddings.word_embeddings", # bert nomic-bert
1920
"language_model.embedding.word_embeddings", # persimmon
@@ -94,6 +95,7 @@ class TensorNameMap:
9495
"model.ln_out", # rwkv7
9596
"backbone.final_layer_norm", # wavtokenizer
9697
"model.norm", # llama4
98+
"model.language_model.norm" # glm-4-thinking
9799
),
98100

99101
# Rope frequencies
@@ -139,6 +141,7 @@ class TensorNameMap:
139141
"model.layers.{bid}.input_layernorm", # llama4
140142
"transformer_encoder.{bid}.attention_norm", # neobert
141143
"model.layers.{bid}.operator_norm", # lfm2
144+
"model.language_model.layers.{bid}.input_layernorm", # glm-4-thinking
142145
),
143146

144147
# Attention norm 2
@@ -183,6 +186,7 @@ class TensorNameMap:
183186
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
184187
"transformer.h.{bid}.attn.attention.q_proj", # exaone
185188
"model.layers.{bid}.self_attn.q_proj", # llama4
189+
"model.language_model.layers.{bid}.self_attn.q_proj", # glm-4-thinking
186190
),
187191

188192
# Attention key
@@ -199,6 +203,7 @@ class TensorNameMap:
199203
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
200204
"transformer.h.{bid}.attn.attention.k_proj", # exaone
201205
"model.layers.{bid}.self_attn.k_proj", # llama4
206+
"model.language_model.layers.{bid}.self_attn.k_proj", # glm-4-thinking
202207
),
203208

204209
# Attention value
@@ -214,6 +219,7 @@ class TensorNameMap:
214219
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
215220
"transformer.h.{bid}.attn.attention.v_proj", # exaone
216221
"model.layers.{bid}.self_attn.v_proj", # llama4
222+
"model.language_model.layers.{bid}.self_attn.v_proj", # glm-4-thinking
217223
),
218224

219225
# Attention output
@@ -246,6 +252,7 @@ class TensorNameMap:
246252
"transformer.h.{bid}.attn.attention.out_proj", # exaone
247253
"model.layers.{bid}.self_attn.o_proj", # llama4
248254
"transformer_encoder.{bid}.wo", # neobert
255+
"model.language_model.layers.{bid}.self_attn.o_proj", # glm-4-thinking
249256
),
250257

251258
# Attention output norm
@@ -258,9 +265,10 @@ class TensorNameMap:
258265
),
259266

260267
MODEL_TENSOR.ATTN_POST_NORM: (
261-
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
262-
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
263-
"model.layers.layers.{bid}.post_mixer_norm.weight", # plamo2
268+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
269+
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
270+
"model.layers.layers.{bid}.post_mixer_norm.weight", # plamo2
271+
"model.language_model.layers.{bid}.post_self_attn_layernorm", # glm-4-thinking
264272
),
265273

266274
# Rotary embeddings
@@ -291,6 +299,7 @@ class TensorNameMap:
291299
"model.layers.{bid}.post_attention_layernorm", # llama4
292300
"transformer_encoder.{bid}.ffn_norm", # neobert
293301
"model.layers.layers.{bid}.pre_mlp_norm", # plamo2
302+
"model.language_model.layers.{bid}.post_attention_layernorm" # glm-4-thinking
294303
),
295304

296305
# Post feed-forward norm
@@ -305,6 +314,7 @@ class TensorNameMap:
305314
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
306315
"model.layers.layers.{bid}.post_mlp_norm.weight", # plamo2
307316
"model.layers.{bid}.feed_forward.up_proj",
317+
"model.language_model.layers.{bid}.post_mlp_layernorm", # glm-4-thinking
308318
),
309319

310320
MODEL_TENSOR.FFN_GATE_INP: (
@@ -362,6 +372,7 @@ class TensorNameMap:
362372
"transformer.h.{bid}.mlp.c_fc_1", # exaone
363373
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
364374
"transformer_encoder.{bid}.ffn.w12", # neobert
375+
"model.language_model.layers.{bid}.mlp.gate_up_proj", # glm-4-thinking
365376
),
366377

367378
MODEL_TENSOR.FFN_UP_EXP: (
@@ -448,6 +459,7 @@ class TensorNameMap:
448459
"model.layers.h.{bid}.mlp.c_proj", # exaone
449460
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
450461
"transformer_encoder.{bid}.ffn.w3", # neobert
462+
"model.language_model.layers.{bid}.mlp.down_proj", # glm-4-thinking
451463
),
452464

453465
MODEL_TENSOR.FFN_DOWN_EXP: (

0 commit comments

Comments
 (0)