@@ -1818,6 +1818,58 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1818
1818
1819
1819
return [(self .map_tensor_name (name ), data_torch )]
1820
1820
1821
+ @Model .register ("MiniCPM3ForCausalLM" )
1822
+ class MiniCPM3Model (Model ):
1823
+ model_arch = gguf .MODEL_ARCH .MINICPM3
1824
+
1825
+ def set_gguf_parameters (self ):
1826
+ hparams = self .hparams
1827
+
1828
+ rope_dims = hparams ["qk_rope_head_dim" ]
1829
+
1830
+ self .gguf_writer .add_file_type (self .ftype )
1831
+ self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1832
+ self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1833
+ self .gguf_writer .add_block_count (self .block_count )
1834
+ self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
1835
+ self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1836
+ self .gguf_writer .add_head_count_kv (hparams ["num_key_value_heads" ])
1837
+ self .gguf_writer .add_layer_norm_rms_eps (hparams ["rms_norm_eps" ])
1838
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
1839
+ if "q_lora_rank" in hparams and hparams ["q_lora_rank" ] is not None :
1840
+ self .gguf_writer .add_q_lora_rank (hparams ["q_lora_rank" ])
1841
+ self .gguf_writer .add_kv_lora_rank (hparams ["kv_lora_rank" ])
1842
+ self .gguf_writer .add_key_length (hparams ["qk_nope_head_dim" ] + hparams ["qk_rope_head_dim" ])
1843
+ self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
1844
+
1845
+ rope_scaling = self .find_hparam (['rope_scaling' ], True )
1846
+ if rope_scaling is None :
1847
+ return
1848
+
1849
+ long_factors = rope_scaling .get ('long_factor' , None )
1850
+ short_factors = rope_scaling .get ('short_factor' , None )
1851
+
1852
+ if long_factors is None or short_factors is None :
1853
+ raise KeyError ('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor' )
1854
+
1855
+ if len (long_factors ) != len (short_factors ) or len (long_factors ) != rope_dims / 2 :
1856
+ raise ValueError (f'The length of rope long and short factors must be { rope_dims / 2 } ' )
1857
+
1858
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" , np .array (long_factors , dtype = np .float32 ))
1859
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" , np .array (short_factors , dtype = np .float32 ))
1860
+
1861
+ def set_vocab (self ):
1862
+ self ._set_vocab_llama_hf ()
1863
+
1864
+ def _reverse_hf_permute (self , weights : Tensor , n_head : int , n_kv_head : int | None = None ) -> Tensor :
1865
+ if n_kv_head is not None and n_head != n_kv_head :
1866
+ n_head //= n_kv_head
1867
+
1868
+ return (
1869
+ weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1870
+ .swapaxes (1 , 2 )
1871
+ .reshape (weights .shape )
1872
+ )
1821
1873
1822
1874
@Model .register ("QWenLMHeadModel" )
1823
1875
class QwenModel (Model ):
0 commit comments