33
33
KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
34
34
35
35
# LLM
36
- KEY_LLM_CONTEXT_LENGTH = "{llm }.context_length"
37
- KEY_LLM_EMBEDDING_LENGTH = "{llm }.embedding_length"
38
- KEY_LLM_BLOCK_COUNT = "{llm }.block_count"
39
- KEY_LLM_FEED_FORWARD_LENGTH = "{llm }.feed_forward_length"
40
- KEY_LLM_USE_PARALLEL_RESIDUAL = "{llm }.use_parallel_residual"
41
- KEY_LLM_TENSOR_DATA_LAYOUT = "{llm }.tensor_data_layout"
36
+ KEY_LLM_CONTEXT_LENGTH = "{arch }.context_length"
37
+ KEY_LLM_EMBEDDING_LENGTH = "{arch }.embedding_length"
38
+ KEY_LLM_BLOCK_COUNT = "{arch }.block_count"
39
+ KEY_LLM_FEED_FORWARD_LENGTH = "{arch }.feed_forward_length"
40
+ KEY_LLM_USE_PARALLEL_RESIDUAL = "{arch }.use_parallel_residual"
41
+ KEY_LLM_TENSOR_DATA_LAYOUT = "{arch }.tensor_data_layout"
42
42
43
43
# attention
44
- KEY_ATTENTION_HEAD_COUNT = "{llm }.attention.head_count"
45
- KEY_ATTENTION_HEAD_COUNT_KV = "{llm }.attention.head_count_kv"
46
- KEY_ATTENTION_MAX_ALIBI_BIAS = "{llm }.attention.max_alibi_bias"
47
- KEY_ATTENTION_CLAMP_KQV = "{llm }.attention.clamp_kqv"
48
- KEY_ATTENTION_LAYERNORM_EPS = "{llm }.attention.layer_norm_epsilon"
49
- KEY_ATTENTION_LAYERNORM_RMS_EPS = "{llm }.attention.layer_norm_rms_epsilon"
44
+ KEY_ATTENTION_HEAD_COUNT = "{arch }.attention.head_count"
45
+ KEY_ATTENTION_HEAD_COUNT_KV = "{arch }.attention.head_count_kv"
46
+ KEY_ATTENTION_MAX_ALIBI_BIAS = "{arch }.attention.max_alibi_bias"
47
+ KEY_ATTENTION_CLAMP_KQV = "{arch }.attention.clamp_kqv"
48
+ KEY_ATTENTION_LAYERNORM_EPS = "{arch }.attention.layer_norm_epsilon"
49
+ KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch }.attention.layer_norm_rms_epsilon"
50
50
51
51
# RoPE
52
- KEY_ROPE_DIMENSION_COUNT = "{llm }.rope.dimension_count"
53
- KEY_ROPE_SCALE = "{llm }.rope.scale"
52
+ KEY_ROPE_DIMENSION_COUNT = "{arch }.rope.dimension_count"
53
+ KEY_ROPE_SCALE = "{arch }.rope.scale"
54
54
55
55
# tokenization
56
56
KEY_TOKENIZER_MODEL = "tokenizer.ggml.model"
@@ -343,14 +343,16 @@ def get_type(val):
343
343
344
344
345
345
class GGUFWriter :
346
- def __init__ (self , fout : IO ):
347
- self .fout = fout
346
+ def __init__ (self , path : str , arch : str ):
347
+ self .fout = open (path , "wb" )
348
+ self .arch = arch
348
349
self .offset_tensor = 0
349
350
self .data_alignment = GGUF_DEFAULT_ALIGNMENT
350
351
self .kv_data = b""
351
352
self .kv_data_count = 0
352
353
self .ti_data = b""
353
354
self .ti_data_count = 0
355
+ self .add_architecture ()
354
356
355
357
def write_header_to_file (self ):
356
358
self .fout .write (struct .pack ("<I" , GGUF_MAGIC ))
@@ -368,11 +370,6 @@ def write_ti_data_to_file(self):
368
370
self .fout .write (self .ti_data )
369
371
self .flush ()
370
372
371
- @classmethod
372
- def open (cls , path : str ) -> "GGUFWriter" :
373
- f = open (path , "wb" )
374
- return cls (f )
375
-
376
373
def add_key (self , key : str ):
377
374
self .add_val (key , GGUFValueType .STRING , add_vtype = False )
378
375
@@ -409,7 +406,8 @@ def add_bool(self, key: str, val: bool):
409
406
self .add_val (val , GGUFValueType .BOOL )
410
407
411
408
def add_string (self , key : str , val : str ):
412
- if len (val ) == 0 : return
409
+ if len (val ) == 0 :
410
+ return
413
411
self .add_key (key )
414
412
self .add_val (val , GGUFValueType .STRING )
415
413
@@ -463,6 +461,8 @@ def ggml_pad(x: int, n: int) -> int:
463
461
return ((x + n - 1 ) // n ) * n
464
462
465
463
def add_tensor_info (self , name : str , tensor_shape : np .ndarray , tensor_dtype : np .dtype , tensor_nbytes : int ):
464
+ assert tensor_dtype in (np .float32 , np .float16 ), "Only F32 and F16 tensors are supported for now"
465
+
466
466
encoded_name = name .encode ("utf8" )
467
467
self .ti_data += struct .pack ("<I" , len (encoded_name ))
468
468
self .ti_data += encoded_name
@@ -471,7 +471,6 @@ def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.
471
471
for i in range (n_dims ):
472
472
self .ti_data += struct .pack ("<I" , tensor_shape [n_dims - 1 - i ])
473
473
474
- assert tensor_dtype in (np .float32 , np .float16 ), "Only F32 and F16 tensors are supported for now"
475
474
dtype = GGMLQuantizationType .F32 if tensor_dtype == np .float32 else GGMLQuantizationType .F16
476
475
self .ti_data += struct .pack ("<I" , dtype )
477
476
self .ti_data += struct .pack ("<Q" , self .offset_tensor )
@@ -495,15 +494,14 @@ def flush(self):
495
494
def close (self ):
496
495
self .fout .close ()
497
496
498
- def add_architecture (self , architecture : str ):
499
- self .add_string (KEY_GENERAL_ARCHITECTURE ,
500
- architecture )
497
+ def add_architecture (self ):
498
+ self .add_string (KEY_GENERAL_ARCHITECTURE , self .arch )
501
499
502
500
def add_author (self , author : str ):
503
501
self .add_string (KEY_GENERAL_AUTHOR , author )
504
502
505
503
def add_tensor_data_layout (self , layout : str ):
506
- self .add_string (KEY_LLM_TENSOR_DATA_LAYOUT , layout )
504
+ self .add_string (KEY_LLM_TENSOR_DATA_LAYOUT . format ( arch = self . arch ) , layout )
507
505
508
506
def add_url (self , url : str ):
509
507
self .add_string (KEY_GENERAL_URL , url )
@@ -531,60 +529,60 @@ def add_custom_alignment(self, alignment: int):
531
529
self .data_alignment = alignment
532
530
self .add_uint32 (KEY_GENERAL_ALIGNMENT , alignment )
533
531
534
- def add_context_length (self , llm : str , length : int ):
532
+ def add_context_length (self , length : int ):
535
533
self .add_uint32 (
536
- KEY_LLM_CONTEXT_LENGTH .format (llm = llm ), length )
534
+ KEY_LLM_CONTEXT_LENGTH .format (arch = self . arch ), length )
537
535
538
- def add_embedding_length (self , llm : str , length : int ):
536
+ def add_embedding_length (self , length : int ):
539
537
self .add_uint32 (
540
- KEY_LLM_EMBEDDING_LENGTH .format (llm = llm ), length )
538
+ KEY_LLM_EMBEDDING_LENGTH .format (arch = self . arch ), length )
541
539
542
- def add_block_count (self , llm : str , length : int ):
540
+ def add_block_count (self , length : int ):
543
541
self .add_uint32 (
544
- KEY_LLM_BLOCK_COUNT .format (llm = llm ), length )
542
+ KEY_LLM_BLOCK_COUNT .format (arch = self . arch ), length )
545
543
546
- def add_feed_forward_length (self , llm : str , length : int ):
544
+ def add_feed_forward_length (self , length : int ):
547
545
self .add_uint32 (
548
- KEY_LLM_FEED_FORWARD_LENGTH .format (llm = llm ), length )
546
+ KEY_LLM_FEED_FORWARD_LENGTH .format (arch = self . arch ), length )
549
547
550
- def add_parallel_residual (self , llm : str , use : bool ):
548
+ def add_parallel_residual (self , use : bool ):
551
549
self .add_bool (
552
- KEY_LLM_USE_PARALLEL_RESIDUAL .format (llm = llm ), use )
550
+ KEY_LLM_USE_PARALLEL_RESIDUAL .format (arch = self . arch ), use )
553
551
554
- def add_tensor_data_layout (self , llm : str , layout : str ):
552
+ def add_tensor_data_layout (self , layout : str ):
555
553
self .add_string (
556
- KEY_LLM_TENSOR_DATA_LAYOUT .format (llm = llm ), layout )
554
+ KEY_LLM_TENSOR_DATA_LAYOUT .format (arch = self . arch ), layout )
557
555
558
- def add_head_count (self , llm : str , count : int ):
556
+ def add_head_count (self , count : int ):
559
557
self .add_uint32 (
560
- KEY_ATTENTION_HEAD_COUNT .format (llm = llm ), count )
558
+ KEY_ATTENTION_HEAD_COUNT .format (arch = self . arch ), count )
561
559
562
- def add_head_count_kv (self , llm : str , count : int ):
560
+ def add_head_count_kv (self , count : int ):
563
561
self .add_uint32 (
564
- KEY_ATTENTION_HEAD_COUNT_KV .format (llm = llm ), count )
562
+ KEY_ATTENTION_HEAD_COUNT_KV .format (arch = self . arch ), count )
565
563
566
- def add_max_alibi_bias (self , llm : str , bias : float ):
564
+ def add_max_alibi_bias (self , bias : float ):
567
565
self .add_float32 (
568
- KEY_ATTENTION_MAX_ALIBI_BIAS .format (llm = llm ), bias )
566
+ KEY_ATTENTION_MAX_ALIBI_BIAS .format (arch = self . arch ), bias )
569
567
570
- def add_clamp_kqv (self , llm : str , value : float ):
568
+ def add_clamp_kqv (self , value : float ):
571
569
self .add_float32 (
572
- KEY_ATTENTION_CLAMP_KQV .format (llm = llm ), value )
570
+ KEY_ATTENTION_CLAMP_KQV .format (arch = self . arch ), value )
573
571
574
- def add_layer_norm_eps (self , llm : str , value : float ):
572
+ def add_layer_norm_eps (self , value : float ):
575
573
self .add_float32 (
576
- KEY_ATTENTION_LAYERNORM_EPS .format (llm = llm ), value )
574
+ KEY_ATTENTION_LAYERNORM_EPS .format (arch = self . arch ), value )
577
575
578
- def add_layer_norm_rms_eps (self , llm : str , value : float ):
576
+ def add_layer_norm_rms_eps (self , value : float ):
579
577
self .add_float32 (
580
- KEY_ATTENTION_LAYERNORM_RMS_EPS .format (llm = llm ), value )
578
+ KEY_ATTENTION_LAYERNORM_RMS_EPS .format (arch = self . arch ), value )
581
579
582
- def add_rope_dimension_count (self , llm : str , count : int ):
580
+ def add_rope_dimension_count (self , count : int ):
583
581
self .add_uint32 (
584
- KEY_ROPE_DIMENSION_COUNT .format (llm = llm ), count )
582
+ KEY_ROPE_DIMENSION_COUNT .format (arch = self . arch ), count )
585
583
586
- def add_rope_scale (self , llm : str , value : float ):
587
- self .add_float32 (KEY_ROPE_SCALE .format (llm = llm ), value )
584
+ def add_rope_scale (self , value : float ):
585
+ self .add_float32 (KEY_ROPE_SCALE .format (arch = self . arch ), value )
588
586
589
587
def add_tokenizer_model (self , model : str ):
590
588
self .add_string (KEY_TOKENIZER_MODEL , model )
@@ -619,9 +617,8 @@ def add_pad_token_id(self, id: int):
619
617
# Example usage:
620
618
if __name__ == "__main__" :
621
619
# Example usage with a file
622
- gguf_writer = GGUFWriter . open ("example.gguf" )
620
+ gguf_writer = GGUFWriter ("example.gguf" , "llama " )
623
621
624
- gguf_writer .add_architecture ("llama" )
625
622
gguf_writer .add_uint32 ("answer" , 42 ) # Write a 32-bit integer
626
623
gguf_writer .add_float32 ("answer_in_float" , 42.0 ) # Write a 32-bit float
627
624
gguf_writer .add_custom_alignment (64 )
0 commit comments