@@ -490,6 +490,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
490
490
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee" :
491
491
# ref: https://huggingface.co/LumiOpen/Viking-7B
492
492
res = "viking"
493
+ if chkhsh == "b53802fb28e26d645c3a310b34bfe07da813026ec7c7716883404d5e0f8b1901" :
494
+ # ref: https://huggingface.co/core42/jais-13b
495
+ res = "jais"
493
496
494
497
if res is None :
495
498
logger .warning ("\n " )
@@ -576,7 +579,19 @@ def _set_vocab_qwen(self):
576
579
special_vocab ._set_special_token ("unk" , tokenizer .special_tokens ["<|endoftext|>" ])
577
580
special_vocab .add_to_gguf (self .gguf_writer )
578
581
579
- def _set_vocab_sentencepiece (self ):
582
+ def _set_vocab_sentencepiece (self , add_to_gguf = True ):
583
+ tokens , scores , toktypes = self ._create_vocab_sentencepiece ()
584
+
585
+ self .gguf_writer .add_tokenizer_model ("llama" )
586
+ self .gguf_writer .add_tokenizer_pre ("default" )
587
+ self .gguf_writer .add_token_list (tokens )
588
+ self .gguf_writer .add_token_scores (scores )
589
+ self .gguf_writer .add_token_types (toktypes )
590
+
591
+ special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
592
+ special_vocab .add_to_gguf (self .gguf_writer )
593
+
594
+ def _create_vocab_sentencepiece (self ):
580
595
from sentencepiece import SentencePieceProcessor
581
596
582
597
tokenizer_path = self .dir_model / 'tokenizer.model'
@@ -638,14 +653,7 @@ def _set_vocab_sentencepiece(self):
638
653
scores .append (- 1000.0 )
639
654
toktypes .append (SentencePieceTokenTypes .UNUSED )
640
655
641
- self .gguf_writer .add_tokenizer_model ("llama" )
642
- self .gguf_writer .add_tokenizer_pre ("default" )
643
- self .gguf_writer .add_token_list (tokens )
644
- self .gguf_writer .add_token_scores (scores )
645
- self .gguf_writer .add_token_types (toktypes )
646
-
647
- special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
648
- special_vocab .add_to_gguf (self .gguf_writer )
656
+ return tokens , scores , toktypes
649
657
650
658
def _set_vocab_llama_hf (self ):
651
659
vocab = gguf .LlamaHfVocab (self .dir_model )
@@ -1979,7 +1987,7 @@ def set_gguf_parameters(self):
1979
1987
if len (rope_scaling_type ) == 0 :
1980
1988
raise KeyError ('Missing the required key rope_scaling.type' )
1981
1989
1982
- if rope_scaling_type == 'su' :
1990
+ if rope_scaling_type == 'su' or rope_scaling_type == 'longrope' :
1983
1991
attn_factor = math .sqrt (1 + math .log (scale ) / math .log (orig_max_pos_embds )) if scale > 1.0 else 1.0
1984
1992
elif rope_scaling_type == 'yarn' :
1985
1993
attn_factor = 0.1 * math .log (scale ) + 1.0 if scale > 1.0 else 1.0
@@ -2353,6 +2361,8 @@ def set_vocab(self):
2353
2361
special_vocab ._set_special_token ("eot" , 107 )
2354
2362
special_vocab .add_to_gguf (self .gguf_writer )
2355
2363
2364
+ self .gguf_writer .add_add_space_prefix (False )
2365
+
2356
2366
def set_gguf_parameters (self ):
2357
2367
hparams = self .hparams
2358
2368
block_count = hparams ["num_hidden_layers" ]
@@ -2390,7 +2400,20 @@ class Gemma2Model(Model):
2390
2400
model_arch = gguf .MODEL_ARCH .GEMMA2
2391
2401
2392
2402
def set_vocab (self ):
2393
- self ._set_vocab_llama_hf ()
2403
+ tokens , scores , toktypes = self ._create_vocab_sentencepiece ()
2404
+ # hack: This is required so that we can properly use start/end-of-turn for chat template
2405
+ for i in range (108 ):
2406
+ # including <unusedX>, <start_of_turn>, <end_of_turn>
2407
+ toktypes [i ] = SentencePieceTokenTypes .CONTROL
2408
+ self .gguf_writer .add_tokenizer_model ("llama" )
2409
+ self .gguf_writer .add_tokenizer_pre ("default" )
2410
+ self .gguf_writer .add_token_list (tokens )
2411
+ self .gguf_writer .add_token_scores (scores )
2412
+ self .gguf_writer .add_token_types (toktypes )
2413
+
2414
+ special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
2415
+ special_vocab .add_to_gguf (self .gguf_writer )
2416
+
2394
2417
self .gguf_writer .add_add_space_prefix (False )
2395
2418
2396
2419
def set_gguf_parameters (self ):
@@ -2414,6 +2437,12 @@ def set_gguf_parameters(self):
2414
2437
self .gguf_writer .add_final_logit_softcapping (
2415
2438
self .hparams ["final_logit_softcapping" ]
2416
2439
)
2440
+ self .gguf_writer .add_sliding_window (self .hparams ["sliding_window" ])
2441
+
2442
+ # sanity check
2443
+ attn_scalar = self .hparams ["query_pre_attn_scalar" ]
2444
+ if attn_scalar != hparams ["hidden_size" ] / hparams ["num_attention_heads" ]:
2445
+ raise ValueError ("query_pre_attn_scalar must be equal to n_embd / n_head" )
2417
2446
2418
2447
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2419
2448
del bid # unusem
@@ -3031,6 +3060,96 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
3031
3060
return [(self .map_tensor_name (name ), data_torch )]
3032
3061
3033
3062
3063
+ @Model .register ("JAISLMHeadModel" )
3064
+ class JaisModel (Model ):
3065
+ model_arch = gguf .MODEL_ARCH .JAIS
3066
+
3067
+ def __init__ (self , * args , ** kwargs ):
3068
+ super ().__init__ (* args , ** kwargs )
3069
+
3070
+ # SwigLU activation
3071
+ assert self .hparams ["activation_function" ] == "swiglu"
3072
+ # ALiBi position embedding
3073
+ assert self .hparams ["position_embedding_type" ] == "alibi"
3074
+
3075
+ # Embeddings scale
3076
+ self .embeddings_scale = 1.0
3077
+ # note: For some JAIS flavors, output is tied to (same as) wte in original model
3078
+ self .output_is_wte = False
3079
+ if 'mup_embeddings_scale' in self .hparams :
3080
+ self .output_is_wte = True # Hack (?)
3081
+ self .embeddings_scale = self .hparams ['mup_embeddings_scale' ]
3082
+ elif 'embeddings_scale' in self .hparams :
3083
+ self .embeddings_scale = self .hparams ['embeddings_scale' ]
3084
+ else :
3085
+ assert False
3086
+
3087
+ self .width_scale = 1.0
3088
+ if 'mup_output_alpha' in self .hparams :
3089
+ assert 'mup_width_scale' in self .hparams
3090
+ self .width_scale = self .hparams ['mup_output_alpha' ] * self .hparams ['mup_width_scale' ]
3091
+ elif 'width_scale' in self .hparams :
3092
+ self .width_scale = self .hparams ['width_scale' ]
3093
+ else :
3094
+ assert False
3095
+
3096
+ self .max_alibi_bias = 8.0
3097
+
3098
+ def set_vocab (self ):
3099
+ self ._set_vocab_gpt2 ()
3100
+
3101
+ def set_gguf_parameters (self ):
3102
+ self .gguf_writer .add_name (self .dir_model .name )
3103
+ self .gguf_writer .add_block_count (self .hparams ["n_layer" ])
3104
+ self .gguf_writer .add_context_length (self .hparams ["n_positions" ])
3105
+ self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
3106
+ self .gguf_writer .add_feed_forward_length (self .hparams ["n_inner" ])
3107
+ self .gguf_writer .add_head_count (self .hparams ["n_head" ])
3108
+ self .gguf_writer .add_layer_norm_eps (self .hparams ["layer_norm_epsilon" ])
3109
+ self .gguf_writer .add_file_type (self .ftype )
3110
+
3111
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3112
+ del bid # unused
3113
+
3114
+ tensors : list [tuple [str , Tensor ]] = []
3115
+
3116
+ # we don't need these
3117
+ if name .endswith ((".attn.bias" )):
3118
+ return tensors
3119
+
3120
+ if name .endswith (("relative_pe.slopes" )):
3121
+ # Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
3122
+ # Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
3123
+ # but Jais's PyTorch model simply precalculates the slope values and places them
3124
+ # in relative_pes.slopes
3125
+ n_head_closest_log2 = 2 ** math .floor (math .log2 (self .hparams ["n_head" ]))
3126
+ first_val = float (data_torch ._data [0 ])
3127
+ self .max_alibi_bias = - round (math .log2 (first_val ) * n_head_closest_log2 )
3128
+
3129
+ return tensors
3130
+
3131
+ if name .endswith ((".c_attn.weight" , ".c_proj.weight" , ".c_fc.weight" , ".c_fc2.weight" )):
3132
+ data_torch = data_torch .transpose (1 , 0 )
3133
+
3134
+ new_name = self .map_tensor_name (name )
3135
+
3136
+ if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
3137
+ tensors .append ((new_name , data_torch * self .embeddings_scale ))
3138
+ if self .output_is_wte :
3139
+ tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch * self .width_scale ))
3140
+ elif new_name == self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ):
3141
+ assert not self .output_is_wte
3142
+ tensors .append ((new_name , data_torch * self .width_scale ))
3143
+ else :
3144
+ tensors .append ((new_name , data_torch ))
3145
+
3146
+ return tensors
3147
+
3148
+ def write_tensors (self ):
3149
+ super ().write_tensors ()
3150
+ self .gguf_writer .add_max_alibi_bias (self .max_alibi_bias )
3151
+
3152
+
3034
3153
###### CONVERSION LOGIC ######
3035
3154
3036
3155
@@ -3186,7 +3305,8 @@ def main() -> None:
3186
3305
"auto" : gguf .LlamaFileType .GUESSED ,
3187
3306
}
3188
3307
3189
- if args .use_temp_file and (args .split_max_tensors > 0 or args .split_max_size != "0" ):
3308
+ is_split = args .split_max_tensors > 0 or args .split_max_size != "0"
3309
+ if args .use_temp_file and is_split :
3190
3310
logger .error ("Error: Cannot use temp file when splitting" )
3191
3311
sys .exit (1 )
3192
3312
@@ -3223,11 +3343,12 @@ def main() -> None:
3223
3343
if args .vocab_only :
3224
3344
logger .info ("Exporting model vocab..." )
3225
3345
model_instance .write_vocab ()
3226
- logger .info ("Model vocab successfully exported. " )
3346
+ logger .info (f "Model vocab successfully exported to { model_instance . fname_out } " )
3227
3347
else :
3228
3348
logger .info ("Exporting model..." )
3229
3349
model_instance .write ()
3230
- logger .info ("Model successfully exported." )
3350
+ out_path = f"{ model_instance .fname_out .parent } { os .sep } " if is_split else model_instance .fname_out
3351
+ logger .info (f"Model successfully exported to { out_path } " )
3231
3352
3232
3353
3233
3354
if __name__ == '__main__' :
0 commit comments