@@ -1841,6 +1841,60 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1841
1841
return [(self .map_tensor_name (name ), data_torch )]
1842
1842
1843
1843
1844
+ @Model .register ("MiniCPM3ForCausalLM" )
1845
+ class MiniCPM3Model (Model ):
1846
+ model_arch = gguf .MODEL_ARCH .MINICPM3
1847
+
1848
+ def set_gguf_parameters (self ):
1849
+ hparams = self .hparams
1850
+
1851
+ rope_dims = hparams ["qk_rope_head_dim" ]
1852
+
1853
+ self .gguf_writer .add_file_type (self .ftype )
1854
+ self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1855
+ self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1856
+ self .gguf_writer .add_block_count (self .block_count )
1857
+ self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
1858
+ self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1859
+ self .gguf_writer .add_head_count_kv (hparams ["num_key_value_heads" ])
1860
+ self .gguf_writer .add_layer_norm_rms_eps (hparams ["rms_norm_eps" ])
1861
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
1862
+ if "q_lora_rank" in hparams and hparams ["q_lora_rank" ] is not None :
1863
+ self .gguf_writer .add_q_lora_rank (hparams ["q_lora_rank" ])
1864
+ self .gguf_writer .add_kv_lora_rank (hparams ["kv_lora_rank" ])
1865
+ self .gguf_writer .add_key_length (hparams ["qk_nope_head_dim" ] + hparams ["qk_rope_head_dim" ])
1866
+ self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
1867
+
1868
+ rope_scaling = self .find_hparam (['rope_scaling' ], True )
1869
+ if rope_scaling is None :
1870
+ return
1871
+
1872
+ long_factors = rope_scaling .get ('long_factor' , None )
1873
+ short_factors = rope_scaling .get ('short_factor' , None )
1874
+
1875
+ if long_factors is None or short_factors is None :
1876
+ raise KeyError ('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor' )
1877
+
1878
+ if len (long_factors ) != len (short_factors ) or len (long_factors ) != rope_dims / 2 :
1879
+ raise ValueError (f'The length of rope long and short factors must be { rope_dims / 2 } ' )
1880
+
1881
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" , np .array (long_factors , dtype = np .float32 ))
1882
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" , np .array (short_factors , dtype = np .float32 ))
1883
+
1884
+ def set_vocab (self ):
1885
+ self ._set_vocab_llama_hf ()
1886
+
1887
+ def _reverse_hf_permute (self , weights : Tensor , n_head : int , n_kv_head : int | None = None ) -> Tensor :
1888
+ if n_kv_head is not None and n_head != n_kv_head :
1889
+ n_head //= n_kv_head
1890
+
1891
+ return (
1892
+ weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1893
+ .swapaxes (1 , 2 )
1894
+ .reshape (weights .shape )
1895
+ )
1896
+
1897
+
1844
1898
@Model .register ("QWenLMHeadModel" )
1845
1899
class QwenModel (Model ):
1846
1900
model_arch = gguf .MODEL_ARCH .QWEN
0 commit comments