Skip to content

Commit 11ef380

Browse files
authored
GGUF : write tensor (#2426)
* WIP: Write tensor * GGUF : Support writing tensors in Python * refactor : rm unused import and upd todos * fix : fix errors upd writing example * rm example.gguf * gitignore *.gguf * undo formatting
1 parent d2bb3ac commit 11ef380

File tree

3 files changed

+73
-36
lines changed

3 files changed

+73
-36
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
*.o
22
*.a
33
*.so
4+
*.gguf
45
.DS_Store
56
.build/
67
.cache/

constants.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
GGUF_MAGIC = 0x47475546
2-
GGUF_VERSION = 1
1+
GGUF_MAGIC = 0x47475546
2+
GGUF_VERSION = 1
3+
GGUF_DEFAULT_ALIGNMENT = 32
34

45
# general
56
KEY_GENERAL_ARCHITECTURE = "general.architecture"

gguf.py

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
"""TODOs
2-
1. Implement writing tensor data with alignment.
3-
2. Implement writers for known architectures, LLaMA in particular.
4-
3. Add docstrings from the format specs.
5-
4. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org.
2+
1. Implement writers for known architectures, LLaMA in particular.
3+
2. Add docstrings from the format specs.
4+
3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org.
65
"""
76

87
import struct
98
import constants
109
from enum import IntEnum
11-
from typing import List, Any
10+
from typing import Any, IO, List
11+
12+
import numpy as np
13+
1214

1315
class GGMLQuantizationType(IntEnum):
1416
F32 = 0
@@ -54,15 +56,18 @@ def get_type(val):
5456
else:
5557
return GGUFValueType.INT32
5658

59+
5760
class GGUFWriter:
58-
def __init__(self, buffered_writer):
59-
self.buffered_writer = buffered_writer
61+
def __init__(self, fout: IO):
62+
self.fout = fout
63+
self.offset_tensor = 0
64+
self.tensors: List[np.ndarray] = []
6065

6166
def write_header(self, tensor_count: int, metadata_kv_count: int):
62-
self.buffered_writer.write(struct.pack("<I", constants.GGUF_MAGIC))
63-
self.buffered_writer.write(struct.pack("<I", constants.GGUF_VERSION))
64-
self.buffered_writer.write(struct.pack("<I", tensor_count))
65-
self.buffered_writer.write(struct.pack("<I", metadata_kv_count))
67+
self.fout.write(struct.pack("<I", constants.GGUF_MAGIC))
68+
self.fout.write(struct.pack("<I", constants.GGUF_VERSION))
69+
self.fout.write(struct.pack("<I", tensor_count))
70+
self.fout.write(struct.pack("<I", metadata_kv_count))
6671

6772
@classmethod
6873
def open(cls, path: str) -> "GGUFWriter":
@@ -119,40 +124,69 @@ def write_val(self: str, val: Any, vtype: GGUFValueType = None):
119124
if vtype is None:
120125
vtype = GGUFValueType.get_type(val)
121126

122-
self.buffered_writer.write(struct.pack("<I", vtype))
127+
self.fout.write(struct.pack("<I", vtype))
123128

124129
if vtype == GGUFValueType.UINT8:
125-
self.buffered_writer.write(struct.pack("<B", val))
130+
self.fout.write(struct.pack("<B", val))
126131
elif vtype == GGUFValueType.INT8:
127-
self.buffered_writer.write(struct.pack("<b", val))
132+
self.fout.write(struct.pack("<b", val))
128133
elif vtype == GGUFValueType.UINT16:
129-
self.buffered_writer.write(struct.pack("<H", val))
134+
self.fout.write(struct.pack("<H", val))
130135
elif vtype == GGUFValueType.INT16:
131-
self.buffered_writer.write(struct.pack("<h", val))
136+
self.fout.write(struct.pack("<h", val))
132137
elif vtype == GGUFValueType.UINT32:
133-
self.buffered_writer.write(struct.pack("<I", val))
138+
self.fout.write(struct.pack("<I", val))
134139
elif vtype == GGUFValueType.INT32:
135-
self.buffered_writer.write(struct.pack("<i", val))
140+
self.fout.write(struct.pack("<i", val))
136141
elif vtype == GGUFValueType.FLOAT32:
137-
self.buffered_writer.write(struct.pack("<f", val))
142+
self.fout.write(struct.pack("<f", val))
138143
elif vtype == GGUFValueType.BOOL:
139-
self.buffered_writer.write(struct.pack("?", val))
144+
self.fout.write(struct.pack("?", val))
140145
elif vtype == GGUFValueType.STRING:
141146
encoded_val = val.encode("utf8")
142-
self.buffered_writer.write(struct.pack("<I", len(encoded_val)))
143-
self.buffered_writer.write(encoded_val)
147+
self.fout.write(struct.pack("<I", len(encoded_val)))
148+
self.fout.write(encoded_val)
144149
elif vtype == GGUFValueType.ARRAY:
145-
self.buffered_writer.write(struct.pack("<I", len(val)))
150+
self.fout.write(struct.pack("<I", len(val)))
146151
for item in val:
147152
self.write_val(item)
148153
else:
149154
raise ValueError("Invalid GGUF metadata value type")
150155

156+
@staticmethod
157+
def ggml_pad(x: int, n: int) -> int:
158+
return ((x + n - 1) // n) * n
159+
160+
def write_tensor_info(self, name: str, tensor: np.ndarray):
161+
self.write_val(name, GGUFValueType.STRING)
162+
n_dims = len(tensor.shape)
163+
self.write_val(n_dims, GGUFValueType.INT32)
164+
for i in range(n_dims):
165+
self.write_val(tensor.shape[n_dims - 1 - i], GGUFValueType.INT32)
166+
167+
assert tensor.dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
168+
dtype = GGMLQuantizationType.F32 if tensor.dtype == np.float32 else GGMLQuantizationType.F16
169+
self.write_val(dtype, GGUFValueType.INT32)
170+
self.fout.write(struct.pack("<Q", self.offset_tensor))
171+
self.offset_tensor += GGUFWriter.ggml_pad(tensor.nbytes, constants.GGUF_DEFAULT_ALIGNMENT)
172+
173+
offset_data = GGUFWriter.ggml_pad(self.fout.tell(), constants.GGUF_DEFAULT_ALIGNMENT)
174+
pad = offset_data - self.fout.tell()
175+
self.fout.write(bytes([0] * pad))
176+
177+
self.tensors.append(tensor)
178+
179+
def write_tensors(self):
180+
for tensor in self.tensors:
181+
tensor.tofile(self.fout)
182+
pad = GGUFWriter.ggml_pad(tensor.nbytes, constants.GGUF_DEFAULT_ALIGNMENT) - tensor.nbytes
183+
self.fout.write(bytes([0] * pad))
184+
151185
def flush(self):
152-
self.buffered_writer.flush()
186+
self.fout.flush()
153187

154188
def close(self):
155-
self.buffered_writer.close()
189+
self.fout.close()
156190

157191
def write_architecture(self, architecture: str):
158192
self.write_string(constants.KEY_GENERAL_ARCHITECTURE,
@@ -235,14 +269,15 @@ def write_rope_scale(self, llm: str, value: float):
235269
if __name__ == "__main__":
236270
# Example usage with a file
237271
gguf_writer = GGUFWriter.open("example.gguf")
238-
gguf_writer.write_header(0, 3)
239-
240-
gguf_writer.write_architecture("llama")
241-
gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer
242-
gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float
243-
# Write an array of integers
244-
#gguf_writer.write_array("simple_array", [1, 2, 3, 4])
245-
# Write a nested array
246-
#gguf_writer.write_array("nested", [1, "nested", [2, 3]])
272+
gguf_writer.write_header(2, 3)
273+
274+
gguf_writer.write_architecture("llama")
275+
gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer
276+
gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float
277+
tensor1 = np.random.random(size=(7, 10)).astype(np.float32)
278+
tensor2 = np.random.random(size=(16, 12)).astype(np.float16)
279+
gguf_writer.write_tensor_info("tensor1", tensor1)
280+
gguf_writer.write_tensor_info("tensor2", tensor2)
281+
gguf_writer.write_tensors()
247282

248283
gguf_writer.close()

0 commit comments

Comments
 (0)