Skip to content

Commit 4ef14f0

Browse files
CarryFunFanruikai
authored andcommitted
llama : support MiniCPM3 (ggml-org#9322)
Co-authored-by: 范睿凯 <[email protected]>
1 parent 3e858d4 commit 4ef14f0

File tree

3 files changed

+371
-0
lines changed

3 files changed

+371
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,60 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
18411841
return [(self.map_tensor_name(name), data_torch)]
18421842

18431843

1844+
@Model.register("MiniCPM3ForCausalLM")
1845+
class MiniCPM3Model(Model):
1846+
model_arch = gguf.MODEL_ARCH.MINICPM3
1847+
1848+
def set_gguf_parameters(self):
1849+
hparams = self.hparams
1850+
1851+
rope_dims = hparams["qk_rope_head_dim"]
1852+
1853+
self.gguf_writer.add_file_type(self.ftype)
1854+
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
1855+
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
1856+
self.gguf_writer.add_block_count(self.block_count)
1857+
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
1858+
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
1859+
self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"])
1860+
self.gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
1861+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
1862+
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
1863+
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
1864+
self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
1865+
self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
1866+
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
1867+
1868+
rope_scaling = self.find_hparam(['rope_scaling'], True)
1869+
if rope_scaling is None:
1870+
return
1871+
1872+
long_factors = rope_scaling.get('long_factor', None)
1873+
short_factors = rope_scaling.get('short_factor', None)
1874+
1875+
if long_factors is None or short_factors is None:
1876+
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
1877+
1878+
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
1879+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
1880+
1881+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
1882+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
1883+
1884+
def set_vocab(self):
1885+
self._set_vocab_llama_hf()
1886+
1887+
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
1888+
if n_kv_head is not None and n_head != n_kv_head:
1889+
n_head //= n_kv_head
1890+
1891+
return (
1892+
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
1893+
.swapaxes(1, 2)
1894+
.reshape(weights.shape)
1895+
)
1896+
1897+
18441898
@Model.register("QWenLMHeadModel")
18451899
class QwenModel(Model):
18461900
model_arch = gguf.MODEL_ARCH.QWEN

gguf-py/gguf/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ class MODEL_ARCH(IntEnum):
210210
ORION = auto()
211211
INTERNLM2 = auto()
212212
MINICPM = auto()
213+
MINICPM3 = auto()
213214
GEMMA = auto()
214215
GEMMA2 = auto()
215216
STARCODER2 = auto()
@@ -364,6 +365,7 @@ class MODEL_TENSOR(IntEnum):
364365
MODEL_ARCH.ORION: "orion",
365366
MODEL_ARCH.INTERNLM2: "internlm2",
366367
MODEL_ARCH.MINICPM: "minicpm",
368+
MODEL_ARCH.MINICPM3: "minicpm3",
367369
MODEL_ARCH.GEMMA: "gemma",
368370
MODEL_ARCH.GEMMA2: "gemma2",
369371
MODEL_ARCH.STARCODER2: "starcoder2",
@@ -867,6 +869,23 @@ class MODEL_TENSOR(IntEnum):
867869
MODEL_TENSOR.FFN_DOWN_EXP,
868870
MODEL_TENSOR.FFN_UP_EXP,
869871
],
872+
MODEL_ARCH.MINICPM3: [
873+
MODEL_TENSOR.TOKEN_EMBD,
874+
MODEL_TENSOR.OUTPUT_NORM,
875+
MODEL_TENSOR.OUTPUT,
876+
MODEL_TENSOR.ATTN_NORM,
877+
MODEL_TENSOR.ATTN_Q_A,
878+
MODEL_TENSOR.ATTN_Q_B,
879+
MODEL_TENSOR.ATTN_KV_A_MQA,
880+
MODEL_TENSOR.ATTN_KV_B,
881+
MODEL_TENSOR.ATTN_Q_A_NORM,
882+
MODEL_TENSOR.ATTN_KV_A_NORM,
883+
MODEL_TENSOR.ATTN_OUT,
884+
MODEL_TENSOR.FFN_NORM,
885+
MODEL_TENSOR.FFN_GATE,
886+
MODEL_TENSOR.FFN_DOWN,
887+
MODEL_TENSOR.FFN_UP,
888+
],
870889
MODEL_ARCH.GEMMA: [
871890
MODEL_TENSOR.TOKEN_EMBD,
872891
MODEL_TENSOR.OUTPUT_NORM,

0 commit comments

Comments
 (0)