|
1 | 1 | """TODOs
|
2 |
| -1. Implement writing tensor data with alignment. |
3 |
| -2. Implement writers for known architectures, LLaMA in particular. |
4 |
| -3. Add docstrings from the format specs. |
5 |
| -4. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. |
| 2 | +1. Implement writers for known architectures, LLaMA in particular. |
| 3 | +2. Add docstrings from the format specs. |
| 4 | +3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. |
6 | 5 | """
|
7 | 6 |
|
8 | 7 | import struct
|
9 | 8 | import constants
|
10 | 9 | from enum import IntEnum
|
11 |
| -from typing import List, Any |
| 10 | +from typing import Any, IO, List |
| 11 | + |
| 12 | +import numpy as np |
| 13 | + |
12 | 14 |
|
13 | 15 | class GGMLQuantizationType(IntEnum):
|
14 | 16 | F32 = 0
|
@@ -54,15 +56,18 @@ def get_type(val):
|
54 | 56 | else:
|
55 | 57 | return GGUFValueType.INT32
|
56 | 58 |
|
| 59 | + |
57 | 60 | class GGUFWriter:
|
58 |
| - def __init__(self, buffered_writer): |
59 |
| - self.buffered_writer = buffered_writer |
| 61 | + def __init__(self, fout: IO): |
| 62 | + self.fout = fout |
| 63 | + self.offset_tensor = 0 |
| 64 | + self.tensors: List[np.ndarray] = [] |
60 | 65 |
|
61 | 66 | def write_header(self, tensor_count: int, metadata_kv_count: int):
|
62 |
| - self.buffered_writer.write(struct.pack("<I", constants.GGUF_MAGIC)) |
63 |
| - self.buffered_writer.write(struct.pack("<I", constants.GGUF_VERSION)) |
64 |
| - self.buffered_writer.write(struct.pack("<I", tensor_count)) |
65 |
| - self.buffered_writer.write(struct.pack("<I", metadata_kv_count)) |
| 67 | + self.fout.write(struct.pack("<I", constants.GGUF_MAGIC)) |
| 68 | + self.fout.write(struct.pack("<I", constants.GGUF_VERSION)) |
| 69 | + self.fout.write(struct.pack("<I", tensor_count)) |
| 70 | + self.fout.write(struct.pack("<I", metadata_kv_count)) |
66 | 71 |
|
67 | 72 | @classmethod
|
68 | 73 | def open(cls, path: str) -> "GGUFWriter":
|
@@ -119,40 +124,69 @@ def write_val(self: str, val: Any, vtype: GGUFValueType = None):
|
119 | 124 | if vtype is None:
|
120 | 125 | vtype = GGUFValueType.get_type(val)
|
121 | 126 |
|
122 |
| - self.buffered_writer.write(struct.pack("<I", vtype)) |
| 127 | + self.fout.write(struct.pack("<I", vtype)) |
123 | 128 |
|
124 | 129 | if vtype == GGUFValueType.UINT8:
|
125 |
| - self.buffered_writer.write(struct.pack("<B", val)) |
| 130 | + self.fout.write(struct.pack("<B", val)) |
126 | 131 | elif vtype == GGUFValueType.INT8:
|
127 |
| - self.buffered_writer.write(struct.pack("<b", val)) |
| 132 | + self.fout.write(struct.pack("<b", val)) |
128 | 133 | elif vtype == GGUFValueType.UINT16:
|
129 |
| - self.buffered_writer.write(struct.pack("<H", val)) |
| 134 | + self.fout.write(struct.pack("<H", val)) |
130 | 135 | elif vtype == GGUFValueType.INT16:
|
131 |
| - self.buffered_writer.write(struct.pack("<h", val)) |
| 136 | + self.fout.write(struct.pack("<h", val)) |
132 | 137 | elif vtype == GGUFValueType.UINT32:
|
133 |
| - self.buffered_writer.write(struct.pack("<I", val)) |
| 138 | + self.fout.write(struct.pack("<I", val)) |
134 | 139 | elif vtype == GGUFValueType.INT32:
|
135 |
| - self.buffered_writer.write(struct.pack("<i", val)) |
| 140 | + self.fout.write(struct.pack("<i", val)) |
136 | 141 | elif vtype == GGUFValueType.FLOAT32:
|
137 |
| - self.buffered_writer.write(struct.pack("<f", val)) |
| 142 | + self.fout.write(struct.pack("<f", val)) |
138 | 143 | elif vtype == GGUFValueType.BOOL:
|
139 |
| - self.buffered_writer.write(struct.pack("?", val)) |
| 144 | + self.fout.write(struct.pack("?", val)) |
140 | 145 | elif vtype == GGUFValueType.STRING:
|
141 | 146 | encoded_val = val.encode("utf8")
|
142 |
| - self.buffered_writer.write(struct.pack("<I", len(encoded_val))) |
143 |
| - self.buffered_writer.write(encoded_val) |
| 147 | + self.fout.write(struct.pack("<I", len(encoded_val))) |
| 148 | + self.fout.write(encoded_val) |
144 | 149 | elif vtype == GGUFValueType.ARRAY:
|
145 |
| - self.buffered_writer.write(struct.pack("<I", len(val))) |
| 150 | + self.fout.write(struct.pack("<I", len(val))) |
146 | 151 | for item in val:
|
147 | 152 | self.write_val(item)
|
148 | 153 | else:
|
149 | 154 | raise ValueError("Invalid GGUF metadata value type")
|
150 | 155 |
|
| 156 | + @staticmethod |
| 157 | + def ggml_pad(x: int, n: int) -> int: |
| 158 | + return ((x + n - 1) // n) * n |
| 159 | + |
| 160 | + def write_tensor_info(self, name: str, tensor: np.ndarray): |
| 161 | + self.write_val(name, GGUFValueType.STRING) |
| 162 | + n_dims = len(tensor.shape) |
| 163 | + self.write_val(n_dims, GGUFValueType.INT32) |
| 164 | + for i in range(n_dims): |
| 165 | + self.write_val(tensor.shape[n_dims - 1 - i], GGUFValueType.INT32) |
| 166 | + |
| 167 | + assert tensor.dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" |
| 168 | + dtype = GGMLQuantizationType.F32 if tensor.dtype == np.float32 else GGMLQuantizationType.F16 |
| 169 | + self.write_val(dtype, GGUFValueType.INT32) |
| 170 | + self.fout.write(struct.pack("<Q", self.offset_tensor)) |
| 171 | + self.offset_tensor += GGUFWriter.ggml_pad(tensor.nbytes, constants.GGUF_DEFAULT_ALIGNMENT) |
| 172 | + |
| 173 | + offset_data = GGUFWriter.ggml_pad(self.fout.tell(), constants.GGUF_DEFAULT_ALIGNMENT) |
| 174 | + pad = offset_data - self.fout.tell() |
| 175 | + self.fout.write(bytes([0] * pad)) |
| 176 | + |
| 177 | + self.tensors.append(tensor) |
| 178 | + |
| 179 | + def write_tensors(self): |
| 180 | + for tensor in self.tensors: |
| 181 | + tensor.tofile(self.fout) |
| 182 | + pad = GGUFWriter.ggml_pad(tensor.nbytes, constants.GGUF_DEFAULT_ALIGNMENT) - tensor.nbytes |
| 183 | + self.fout.write(bytes([0] * pad)) |
| 184 | + |
151 | 185 | def flush(self):
|
152 |
| - self.buffered_writer.flush() |
| 186 | + self.fout.flush() |
153 | 187 |
|
154 | 188 | def close(self):
|
155 |
| - self.buffered_writer.close() |
| 189 | + self.fout.close() |
156 | 190 |
|
157 | 191 | def write_architecture(self, architecture: str):
|
158 | 192 | self.write_string(constants.KEY_GENERAL_ARCHITECTURE,
|
@@ -235,14 +269,15 @@ def write_rope_scale(self, llm: str, value: float):
|
235 | 269 | if __name__ == "__main__":
|
236 | 270 | # Example usage with a file
|
237 | 271 | gguf_writer = GGUFWriter.open("example.gguf")
|
238 |
| - gguf_writer.write_header(0, 3) |
239 |
| - |
240 |
| -gguf_writer.write_architecture("llama") |
241 |
| -gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer |
242 |
| -gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float |
243 |
| -# Write an array of integers |
244 |
| -#gguf_writer.write_array("simple_array", [1, 2, 3, 4]) |
245 |
| -# Write a nested array |
246 |
| -#gguf_writer.write_array("nested", [1, "nested", [2, 3]]) |
| 272 | + gguf_writer.write_header(2, 3) |
| 273 | + |
| 274 | + gguf_writer.write_architecture("llama") |
| 275 | + gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer |
| 276 | + gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float |
| 277 | + tensor1 = np.random.random(size=(7, 10)).astype(np.float32) |
| 278 | + tensor2 = np.random.random(size=(16, 12)).astype(np.float16) |
| 279 | + gguf_writer.write_tensor_info("tensor1", tensor1) |
| 280 | + gguf_writer.write_tensor_info("tensor2", tensor2) |
| 281 | + gguf_writer.write_tensors() |
247 | 282 |
|
248 | 283 | gguf_writer.close()
|
0 commit comments