Skip to content

Commit 39362f3

Browse files
committed
gguf.py : pick some of the refactoring from #2644
1 parent 673ae1a commit 39362f3

File tree

2 files changed

+63
-68
lines changed

2 files changed

+63
-68
lines changed

convert-new.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -705,19 +705,17 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
705705

706706
class OutputFile:
707707
def __init__(self, fname_out: Path) -> None:
708-
self.gguf = gguf.GGUFWriter.open(fname_out)
708+
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
709709

710710
def add_meta_arch(self, params: Params) -> None:
711-
arch = gguf.MODEL_ARCH_NAMES[ARCH]
712-
self.gguf.add_architecture (arch)
713-
self.gguf.add_context_length (arch, params.n_ctx)
714-
self.gguf.add_embedding_length (arch, params.n_embd)
715-
self.gguf.add_block_count (arch, params.n_layer)
716-
self.gguf.add_feed_forward_length (arch, params.n_ff)
717-
self.gguf.add_rope_dimension_count(arch, params.n_embd // params.n_head)
718-
self.gguf.add_head_count (arch, params.n_head)
719-
self.gguf.add_head_count_kv (arch, params.n_head_kv)
720-
self.gguf.add_layer_norm_rms_eps (arch, params.f_norm_eps)
711+
self.gguf.add_context_length (params.n_ctx)
712+
self.gguf.add_embedding_length (params.n_embd)
713+
self.gguf.add_block_count (params.n_layer)
714+
self.gguf.add_feed_forward_length (params.n_ff)
715+
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
716+
self.gguf.add_head_count (params.n_head)
717+
self.gguf.add_head_count_kv (params.n_head_kv)
718+
self.gguf.add_layer_norm_rms_eps (params.f_norm_eps)
721719

722720
def add_meta_vocab(self, vocab: Vocab) -> None:
723721
tokens = []

gguf.py

Lines changed: 54 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,24 @@
3333
KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
3434

3535
# LLM
36-
KEY_LLM_CONTEXT_LENGTH = "{llm}.context_length"
37-
KEY_LLM_EMBEDDING_LENGTH = "{llm}.embedding_length"
38-
KEY_LLM_BLOCK_COUNT = "{llm}.block_count"
39-
KEY_LLM_FEED_FORWARD_LENGTH = "{llm}.feed_forward_length"
40-
KEY_LLM_USE_PARALLEL_RESIDUAL = "{llm}.use_parallel_residual"
41-
KEY_LLM_TENSOR_DATA_LAYOUT = "{llm}.tensor_data_layout"
36+
KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length"
37+
KEY_LLM_EMBEDDING_LENGTH = "{arch}.embedding_length"
38+
KEY_LLM_BLOCK_COUNT = "{arch}.block_count"
39+
KEY_LLM_FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
40+
KEY_LLM_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
41+
KEY_LLM_TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
4242

4343
# attention
44-
KEY_ATTENTION_HEAD_COUNT = "{llm}.attention.head_count"
45-
KEY_ATTENTION_HEAD_COUNT_KV = "{llm}.attention.head_count_kv"
46-
KEY_ATTENTION_MAX_ALIBI_BIAS = "{llm}.attention.max_alibi_bias"
47-
KEY_ATTENTION_CLAMP_KQV = "{llm}.attention.clamp_kqv"
48-
KEY_ATTENTION_LAYERNORM_EPS = "{llm}.attention.layer_norm_epsilon"
49-
KEY_ATTENTION_LAYERNORM_RMS_EPS = "{llm}.attention.layer_norm_rms_epsilon"
44+
KEY_ATTENTION_HEAD_COUNT = "{arch}.attention.head_count"
45+
KEY_ATTENTION_HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
46+
KEY_ATTENTION_MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
47+
KEY_ATTENTION_CLAMP_KQV = "{arch}.attention.clamp_kqv"
48+
KEY_ATTENTION_LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
49+
KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
5050

5151
# RoPE
52-
KEY_ROPE_DIMENSION_COUNT = "{llm}.rope.dimension_count"
53-
KEY_ROPE_SCALE = "{llm}.rope.scale"
52+
KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
53+
KEY_ROPE_SCALE = "{arch}.rope.scale"
5454

5555
# tokenization
5656
KEY_TOKENIZER_MODEL = "tokenizer.ggml.model"
@@ -343,14 +343,16 @@ def get_type(val):
343343

344344

345345
class GGUFWriter:
346-
def __init__(self, fout: IO):
347-
self.fout = fout
346+
def __init__(self, path: str, arch: str):
347+
self.fout = open(path, "wb")
348+
self.arch = arch
348349
self.offset_tensor = 0
349350
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
350351
self.kv_data = b""
351352
self.kv_data_count = 0
352353
self.ti_data = b""
353354
self.ti_data_count = 0
355+
self.add_architecture()
354356

355357
def write_header_to_file(self):
356358
self.fout.write(struct.pack("<I", GGUF_MAGIC))
@@ -368,11 +370,6 @@ def write_ti_data_to_file(self):
368370
self.fout.write(self.ti_data)
369371
self.flush()
370372

371-
@classmethod
372-
def open(cls, path: str) -> "GGUFWriter":
373-
f = open(path, "wb")
374-
return cls(f)
375-
376373
def add_key(self, key: str):
377374
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
378375

@@ -409,7 +406,8 @@ def add_bool(self, key: str, val: bool):
409406
self.add_val(val, GGUFValueType.BOOL)
410407

411408
def add_string(self, key: str, val: str):
412-
if len(val) == 0: return
409+
if len(val) == 0:
410+
return
413411
self.add_key(key)
414412
self.add_val(val, GGUFValueType.STRING)
415413

@@ -463,6 +461,8 @@ def ggml_pad(x: int, n: int) -> int:
463461
return ((x + n - 1) // n) * n
464462

465463
def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.dtype, tensor_nbytes: int):
464+
assert tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
465+
466466
encoded_name = name.encode("utf8")
467467
self.ti_data += struct.pack("<I", len(encoded_name))
468468
self.ti_data += encoded_name
@@ -471,7 +471,6 @@ def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.
471471
for i in range(n_dims):
472472
self.ti_data += struct.pack("<I", tensor_shape[n_dims - 1 - i])
473473

474-
assert tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
475474
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
476475
self.ti_data += struct.pack("<I", dtype)
477476
self.ti_data += struct.pack("<Q", self.offset_tensor)
@@ -495,15 +494,14 @@ def flush(self):
495494
def close(self):
496495
self.fout.close()
497496

498-
def add_architecture(self, architecture: str):
499-
self.add_string(KEY_GENERAL_ARCHITECTURE,
500-
architecture)
497+
def add_architecture(self):
498+
self.add_string(KEY_GENERAL_ARCHITECTURE, self.arch)
501499

502500
def add_author(self, author: str):
503501
self.add_string(KEY_GENERAL_AUTHOR, author)
504502

505503
def add_tensor_data_layout(self, layout: str):
506-
self.add_string(KEY_LLM_TENSOR_DATA_LAYOUT , layout)
504+
self.add_string(KEY_LLM_TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
507505

508506
def add_url(self, url: str):
509507
self.add_string(KEY_GENERAL_URL, url)
@@ -531,60 +529,60 @@ def add_custom_alignment(self, alignment: int):
531529
self.data_alignment = alignment
532530
self.add_uint32(KEY_GENERAL_ALIGNMENT, alignment)
533531

534-
def add_context_length(self, llm: str, length: int):
532+
def add_context_length(self, length: int):
535533
self.add_uint32(
536-
KEY_LLM_CONTEXT_LENGTH.format(llm=llm), length)
534+
KEY_LLM_CONTEXT_LENGTH.format(arch=self.arch), length)
537535

538-
def add_embedding_length(self, llm: str, length: int):
536+
def add_embedding_length(self, length: int):
539537
self.add_uint32(
540-
KEY_LLM_EMBEDDING_LENGTH.format(llm=llm), length)
538+
KEY_LLM_EMBEDDING_LENGTH.format(arch=self.arch), length)
541539

542-
def add_block_count(self, llm: str, length: int):
540+
def add_block_count(self, length: int):
543541
self.add_uint32(
544-
KEY_LLM_BLOCK_COUNT.format(llm=llm), length)
542+
KEY_LLM_BLOCK_COUNT.format(arch=self.arch), length)
545543

546-
def add_feed_forward_length(self, llm: str, length: int):
544+
def add_feed_forward_length(self, length: int):
547545
self.add_uint32(
548-
KEY_LLM_FEED_FORWARD_LENGTH.format(llm=llm), length)
546+
KEY_LLM_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
549547

550-
def add_parallel_residual(self, llm: str, use: bool):
548+
def add_parallel_residual(self, use: bool):
551549
self.add_bool(
552-
KEY_LLM_USE_PARALLEL_RESIDUAL.format(llm=llm), use)
550+
KEY_LLM_USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
553551

554-
def add_tensor_data_layout(self, llm: str, layout: str):
552+
def add_tensor_data_layout(self, layout: str):
555553
self.add_string(
556-
KEY_LLM_TENSOR_DATA_LAYOUT.format(llm=llm), layout)
554+
KEY_LLM_TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
557555

558-
def add_head_count(self, llm: str, count: int):
556+
def add_head_count(self, count: int):
559557
self.add_uint32(
560-
KEY_ATTENTION_HEAD_COUNT.format(llm=llm), count)
558+
KEY_ATTENTION_HEAD_COUNT.format(arch=self.arch), count)
561559

562-
def add_head_count_kv(self, llm: str, count: int):
560+
def add_head_count_kv(self, count: int):
563561
self.add_uint32(
564-
KEY_ATTENTION_HEAD_COUNT_KV.format(llm=llm), count)
562+
KEY_ATTENTION_HEAD_COUNT_KV.format(arch=self.arch), count)
565563

566-
def add_max_alibi_bias(self, llm: str, bias: float):
564+
def add_max_alibi_bias(self, bias: float):
567565
self.add_float32(
568-
KEY_ATTENTION_MAX_ALIBI_BIAS.format(llm=llm), bias)
566+
KEY_ATTENTION_MAX_ALIBI_BIAS.format(arch=self.arch), bias)
569567

570-
def add_clamp_kqv(self, llm: str, value: float):
568+
def add_clamp_kqv(self, value: float):
571569
self.add_float32(
572-
KEY_ATTENTION_CLAMP_KQV.format(llm=llm), value)
570+
KEY_ATTENTION_CLAMP_KQV.format(arch=self.arch), value)
573571

574-
def add_layer_norm_eps(self, llm: str, value: float):
572+
def add_layer_norm_eps(self, value: float):
575573
self.add_float32(
576-
KEY_ATTENTION_LAYERNORM_EPS.format(llm=llm), value)
574+
KEY_ATTENTION_LAYERNORM_EPS.format(arch=self.arch), value)
577575

578-
def add_layer_norm_rms_eps(self, llm: str, value: float):
576+
def add_layer_norm_rms_eps(self, value: float):
579577
self.add_float32(
580-
KEY_ATTENTION_LAYERNORM_RMS_EPS.format(llm=llm), value)
578+
KEY_ATTENTION_LAYERNORM_RMS_EPS.format(arch=self.arch), value)
581579

582-
def add_rope_dimension_count(self, llm: str, count: int):
580+
def add_rope_dimension_count(self, count: int):
583581
self.add_uint32(
584-
KEY_ROPE_DIMENSION_COUNT.format(llm=llm), count)
582+
KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
585583

586-
def add_rope_scale(self, llm: str, value: float):
587-
self.add_float32(KEY_ROPE_SCALE.format(llm=llm), value)
584+
def add_rope_scale(self, value: float):
585+
self.add_float32(KEY_ROPE_SCALE.format(arch=self.arch), value)
588586

589587
def add_tokenizer_model(self, model: str):
590588
self.add_string(KEY_TOKENIZER_MODEL, model)
@@ -619,9 +617,8 @@ def add_pad_token_id(self, id: int):
619617
# Example usage:
620618
if __name__ == "__main__":
621619
# Example usage with a file
622-
gguf_writer = GGUFWriter.open("example.gguf")
620+
gguf_writer = GGUFWriter("example.gguf", "llama")
623621

624-
gguf_writer.add_architecture("llama")
625622
gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
626623
gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
627624
gguf_writer.add_custom_alignment(64)

0 commit comments

Comments
 (0)