@@ -1819,6 +1819,60 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1819
1819
return [(self .map_tensor_name (name ), data_torch )]
1820
1820
1821
1821
1822
+ @Model .register ("MiniCPM3ForCausalLM" )
1823
+ class MiniCPM3Model (Model ):
1824
+ model_arch = gguf .MODEL_ARCH .MINICPM3
1825
+
1826
+ def set_gguf_parameters (self ):
1827
+ hparams = self .hparams
1828
+
1829
+ rope_dims = hparams ["qk_rope_head_dim" ]
1830
+
1831
+ self .gguf_writer .add_file_type (self .ftype )
1832
+ self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1833
+ self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1834
+ self .gguf_writer .add_block_count (self .block_count )
1835
+ self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
1836
+ self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1837
+ self .gguf_writer .add_head_count_kv (hparams ["num_key_value_heads" ])
1838
+ self .gguf_writer .add_layer_norm_rms_eps (hparams ["rms_norm_eps" ])
1839
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
1840
+ if "q_lora_rank" in hparams and hparams ["q_lora_rank" ] is not None :
1841
+ self .gguf_writer .add_q_lora_rank (hparams ["q_lora_rank" ])
1842
+ self .gguf_writer .add_kv_lora_rank (hparams ["kv_lora_rank" ])
1843
+ self .gguf_writer .add_key_length (hparams ["qk_nope_head_dim" ] + hparams ["qk_rope_head_dim" ])
1844
+ self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
1845
+
1846
+ rope_scaling = self .find_hparam (['rope_scaling' ], True )
1847
+ if rope_scaling is None :
1848
+ return
1849
+
1850
+ long_factors = rope_scaling .get ('long_factor' , None )
1851
+ short_factors = rope_scaling .get ('short_factor' , None )
1852
+
1853
+ if long_factors is None or short_factors is None :
1854
+ raise KeyError ('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor' )
1855
+
1856
+ if len (long_factors ) != len (short_factors ) or len (long_factors ) != rope_dims / 2 :
1857
+ raise ValueError (f'The length of rope long and short factors must be { rope_dims / 2 } ' )
1858
+
1859
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" , np .array (long_factors , dtype = np .float32 ))
1860
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" , np .array (short_factors , dtype = np .float32 ))
1861
+
1862
+ def set_vocab (self ):
1863
+ self ._set_vocab_llama_hf ()
1864
+
1865
+ def _reverse_hf_permute (self , weights : Tensor , n_head : int , n_kv_head : int | None = None ) -> Tensor :
1866
+ if n_kv_head is not None and n_head != n_kv_head :
1867
+ n_head //= n_kv_head
1868
+
1869
+ return (
1870
+ weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1871
+ .swapaxes (1 , 2 )
1872
+ .reshape (weights .shape )
1873
+ )
1874
+
1875
+
1822
1876
@Model .register ("QWenLMHeadModel" )
1823
1877
class QwenModel (Model ):
1824
1878
model_arch = gguf .MODEL_ARCH .QWEN
0 commit comments