Skip to content

Commit f83e9c9

Browse files
committed
Support MiniCPM3.
1 parent bdf314f commit f83e9c9

File tree

3 files changed

+371
-0
lines changed

3 files changed

+371
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,6 +1819,60 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
18191819
return [(self.map_tensor_name(name), data_torch)]
18201820

18211821

1822+
@Model.register("MiniCPM3ForCausalLM")
1823+
class MiniCPM3Model(Model):
1824+
model_arch = gguf.MODEL_ARCH.MINICPM3
1825+
1826+
def set_gguf_parameters(self):
1827+
hparams = self.hparams
1828+
1829+
rope_dims = hparams["qk_rope_head_dim"]
1830+
1831+
self.gguf_writer.add_file_type(self.ftype)
1832+
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
1833+
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
1834+
self.gguf_writer.add_block_count(self.block_count)
1835+
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
1836+
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
1837+
self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"])
1838+
self.gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
1839+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
1840+
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
1841+
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
1842+
self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
1843+
self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
1844+
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
1845+
1846+
rope_scaling = self.find_hparam(['rope_scaling'], True)
1847+
if rope_scaling is None:
1848+
return
1849+
1850+
long_factors = rope_scaling.get('long_factor', None)
1851+
short_factors = rope_scaling.get('short_factor', None)
1852+
1853+
if long_factors is None or short_factors is None:
1854+
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
1855+
1856+
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
1857+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
1858+
1859+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
1860+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
1861+
1862+
def set_vocab(self):
1863+
self._set_vocab_llama_hf()
1864+
1865+
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
1866+
if n_kv_head is not None and n_head != n_kv_head:
1867+
n_head //= n_kv_head
1868+
1869+
return (
1870+
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
1871+
.swapaxes(1, 2)
1872+
.reshape(weights.shape)
1873+
)
1874+
1875+
18221876
@Model.register("QWenLMHeadModel")
18231877
class QwenModel(Model):
18241878
model_arch = gguf.MODEL_ARCH.QWEN

gguf-py/gguf/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ class MODEL_ARCH(IntEnum):
210210
ORION = auto()
211211
INTERNLM2 = auto()
212212
MINICPM = auto()
213+
MINICPM3 = auto()
213214
GEMMA = auto()
214215
GEMMA2 = auto()
215216
STARCODER2 = auto()
@@ -364,6 +365,7 @@ class MODEL_TENSOR(IntEnum):
364365
MODEL_ARCH.ORION: "orion",
365366
MODEL_ARCH.INTERNLM2: "internlm2",
366367
MODEL_ARCH.MINICPM: "minicpm",
368+
MODEL_ARCH.MINICPM3: "minicpm3",
367369
MODEL_ARCH.GEMMA: "gemma",
368370
MODEL_ARCH.GEMMA2: "gemma2",
369371
MODEL_ARCH.STARCODER2: "starcoder2",
@@ -867,6 +869,23 @@ class MODEL_TENSOR(IntEnum):
867869
MODEL_TENSOR.FFN_DOWN_EXP,
868870
MODEL_TENSOR.FFN_UP_EXP,
869871
],
872+
MODEL_ARCH.MINICPM3: [
873+
MODEL_TENSOR.TOKEN_EMBD,
874+
MODEL_TENSOR.OUTPUT_NORM,
875+
MODEL_TENSOR.OUTPUT,
876+
MODEL_TENSOR.ATTN_NORM,
877+
MODEL_TENSOR.ATTN_Q_A,
878+
MODEL_TENSOR.ATTN_Q_B,
879+
MODEL_TENSOR.ATTN_KV_A_MQA,
880+
MODEL_TENSOR.ATTN_KV_B,
881+
MODEL_TENSOR.ATTN_Q_A_NORM,
882+
MODEL_TENSOR.ATTN_KV_A_NORM,
883+
MODEL_TENSOR.ATTN_OUT,
884+
MODEL_TENSOR.FFN_NORM,
885+
MODEL_TENSOR.FFN_GATE,
886+
MODEL_TENSOR.FFN_DOWN,
887+
MODEL_TENSOR.FFN_UP,
888+
],
870889
MODEL_ARCH.GEMMA: [
871890
MODEL_TENSOR.TOKEN_EMBD,
872891
MODEL_TENSOR.OUTPUT_NORM,

0 commit comments

Comments
 (0)