@@ -523,15 +523,15 @@ def set_gguf_parameters(self):
523
523
self .gguf_writer .add_context_length (n_ctx )
524
524
logger .info (f"gguf: context length = { n_ctx } " )
525
525
526
- if (n_embd := self .find_hparam (["hidden_size" , "n_embd" ], optional = True )) is not None :
526
+ if (n_embd := self .find_hparam (["hidden_size" , "n_embd" , "dim" ], optional = True )) is not None :
527
527
self .gguf_writer .add_embedding_length (n_embd )
528
528
logger .info (f"gguf: embedding length = { n_embd } " )
529
529
530
- if (n_ff := self .find_hparam (["intermediate_size" , "n_inner" ], optional = True )) is not None :
530
+ if (n_ff := self .find_hparam (["intermediate_size" , "n_inner" , "hidden_dim" ], optional = True )) is not None :
531
531
self .gguf_writer .add_feed_forward_length (n_ff )
532
532
logger .info (f"gguf: feed forward length = { n_ff } " )
533
533
534
- if (n_head := self .find_hparam (["num_attention_heads" , "n_head" ], optional = True )) is not None :
534
+ if (n_head := self .find_hparam (["num_attention_heads" , "n_head" , "n_heads" ], optional = True )) is not None :
535
535
self .gguf_writer .add_head_count (n_head )
536
536
logger .info (f"gguf: head count = { n_head } " )
537
537
@@ -674,12 +674,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
674
674
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed" :
675
675
# ref: https://huggingface.co/tiiuae/falcon-7b
676
676
res = "falcon"
677
- if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e" :
678
- # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
679
- res = "falcon3"
680
677
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f" :
681
678
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
682
679
res = "bert-bge"
680
+ if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e" :
681
+ # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
682
+ res = "falcon3"
683
683
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7" :
684
684
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
685
685
res = "bert-bge-large"
@@ -731,9 +731,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
731
731
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a" :
732
732
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
733
733
res = "jina-v2-code"
734
- if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516" :
735
- # ref: https://huggingface.co/THUDM/glm-4-9b-chat
736
- res = "chatglm-bpe"
737
734
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee" :
738
735
# ref: https://huggingface.co/LumiOpen/Viking-7B
739
736
res = "viking"
@@ -764,9 +761,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
764
761
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450" :
765
762
# ref: https://huggingface.co/facebook/chameleon-7b
766
763
res = "chameleon"
767
- if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35" :
768
- # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
769
- res = "minerva-7b"
770
764
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65" :
771
765
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
772
766
res = "roberta-bpe"
@@ -797,15 +791,24 @@ def get_vocab_base_pre(self, tokenizer) -> str:
797
791
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406" :
798
792
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
799
793
res = "llama4"
800
- if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2" :
801
- # ref: https://huggingface.co/THUDM/glm-4-9b-hf
802
- res = "glm4"
803
794
if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3" :
804
795
# ref: https://huggingface.co/mistral-community/pixtral-12b
805
796
res = "pixtral"
806
797
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec" :
807
798
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
808
799
res = "seed-coder"
800
+ if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" :
801
+ # ref: https://huggingface.co/THUDM/glm-4-9b-chat
802
+ res = "chatglm-bpe"
803
+ if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516" :
804
+ # ref: https://huggingface.co/THUDM/glm-4-9b-chat
805
+ res = "chatglm-bpe"
806
+ if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2" :
807
+ # ref: https://huggingface.co/THUDM/glm-4-9b-hf
808
+ res = "glm4"
809
+ if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35" :
810
+ # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
811
+ res = "minerva-7b"
809
812
810
813
if res is None :
811
814
logger .warning ("\n " )
@@ -1044,6 +1047,10 @@ def _set_vocab_rwkv_world(self):
1044
1047
special_vocab .chat_template = "rwkv-world"
1045
1048
# hack: Add '\n\n' as the EOT token to make it chat normally
1046
1049
special_vocab ._set_special_token ("eot" , 261 )
1050
+ # hack: Override these as they have already been set (incorrectly)
1051
+ special_vocab .special_token_ids ["bos" ] = 0
1052
+ special_vocab .special_token_ids ["eos" ] = 0
1053
+
1047
1054
special_vocab .add_to_gguf (self .gguf_writer )
1048
1055
1049
1056
def _set_vocab_builtin (self , model_name : Literal ["gpt-neox" , "llama-spm" ], vocab_size : int ):
@@ -3907,6 +3914,26 @@ def _xlmroberta_set_vocab(self) -> None:
3907
3914
self .gguf_writer .add_add_eos_token (True )
3908
3915
3909
3916
3917
+ @ModelBase .register ("DistilBertModel" , "DistilBertForMaskedLM" , "DistilBertForSequenceClassification" )
3918
+ class DistilBertModel (BertModel ):
3919
+ model_arch = gguf .MODEL_ARCH .BERT
3920
+
3921
+ def set_gguf_parameters (self ):
3922
+ self .gguf_writer .add_layer_norm_eps (1e-12 )
3923
+ logger .info ("gguf: layer norm epsilon = 1e-12" )
3924
+ super ().set_gguf_parameters ()
3925
+
3926
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3927
+ if name .startswith ("distilbert." ):
3928
+ name = name [11 :]
3929
+
3930
+ # These layers act as MLM head, so we don't need them
3931
+ if name .startswith ("vocab_" ):
3932
+ return []
3933
+
3934
+ return super ().modify_tensors (data_torch , name , bid )
3935
+
3936
+
3910
3937
@ModelBase .register ("RobertaModel" , "RobertaForSequenceClassification" )
3911
3938
class RobertaModel (BertModel ):
3912
3939
model_arch = gguf .MODEL_ARCH .BERT
0 commit comments