22
22
gguf .GGMLQuantizationType .Q4_K : (256 , 2 + 2 + QK_K // 2 + 12 ),
23
23
gguf .GGMLQuantizationType .Q5_K : (256 , 2 + 2 + QK_K // 2 + QK_K // 8 + 12 ),
24
24
gguf .GGMLQuantizationType .Q6_K : (256 , 2 + QK_K // 2 + QK_K // 4 + QK_K // 16 ),
25
- gguf .GGMLQuantizationType .Q8_K : (256 , 2 + QK_K + QK_K // 8 ),
25
+ gguf .GGMLQuantizationType .Q8_K : (256 , 4 + QK_K + QK_K // 8 ),
26
26
}
27
27
28
28
class Hyperparameters :
29
29
def __init__ (self ):
30
30
self .n_vocab = self .n_embd = self .n_mult = self .n_head = self .n_layer = self .n_rot = self .ftype = 0
31
+ self .n_ff = 0
32
+
33
+ def set_n_ff (self , model ):
34
+ ff_tensor_idx = model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
35
+ assert ff_tensor_idx is not None , 'Missing layer 0 FF tensor'
36
+ ff_tensor = model .tensors [ff_tensor_idx ]
37
+ self .n_ff = ff_tensor .dims [1 ]
31
38
32
39
def load (self , data , offset ):
33
40
(
@@ -42,7 +49,7 @@ def load(self, data, offset):
42
49
return 4 * 7
43
50
44
51
def __str__ (self ):
45
- return f'<Hyperparameters: n_vocab={ self .n_vocab } , n_embd={ self .n_embd } , n_mult={ self .n_mult } , n_head={ self .n_head } , n_layer={ self .n_layer } , n_rot={ self .n_rot } , ftype={ self .ftype } >'
52
+ return f'<Hyperparameters: n_vocab={ self .n_vocab } , n_embd={ self .n_embd } , n_mult={ self .n_mult } , n_head={ self .n_head } , n_layer={ self .n_layer } , n_rot={ self .n_rot } , n_ff= { self . n_ff } , ftype={ self .ftype } >'
46
53
47
54
class Vocab :
48
55
def __init__ (self ):
@@ -122,6 +129,7 @@ def load(self, data, offset):
122
129
self .vocab = vocab
123
130
self .tensors = tensors
124
131
self .tensor_map = tensor_map
132
+ hp .set_n_ff (self )
125
133
return offset
126
134
127
135
class GGMLToGGUF :
@@ -132,10 +140,6 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override
132
140
self .cfg = cfg
133
141
self .params_override = params_override
134
142
self .vocab_override = vocab_override
135
- ff_tensor_idx = ggml_model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
136
- assert ff_tensor_idx is not None , 'Missing layer 0 FF tensor'
137
- ff_tensor = ggml_model .tensors [ff_tensor_idx ]
138
- self .ff_length = ff_tensor .dims [1 ]
139
143
if params_override is not None :
140
144
n_kv_head = params_override .n_head_kv
141
145
else :
@@ -196,7 +200,7 @@ def add_params(self, gguf_writer):
196
200
gguf_writer .add_context_length (cfg .context_length )
197
201
gguf_writer .add_embedding_length (hp .n_embd )
198
202
gguf_writer .add_block_count (hp .n_layer )
199
- gguf_writer .add_feed_forward_length (self . ff_length )
203
+ gguf_writer .add_feed_forward_length (hp . n_ff )
200
204
gguf_writer .add_rope_dimension_count (hp .n_embd // hp .n_head )
201
205
gguf_writer .add_head_count (hp .n_head )
202
206
gguf_writer .add_head_count_kv (self .n_kv_head )
@@ -267,18 +271,24 @@ def add_tensors(self, gguf_writer):
267
271
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
268
272
gguf_writer .add_tensor (mapped_name , data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ], raw_shape = tempdims , raw_dtype = tensor .dtype )
269
273
270
- def handle_metadata (cfg ):
274
+ def handle_metadata (cfg , hp ):
271
275
import convert
272
276
assert cfg .model_metadata_dir .is_dir (), 'Metadata dir is not a directory'
273
277
hf_config_path = cfg .model_metadata_dir / "config.json"
274
278
orig_config_path = cfg .model_metadata_dir / "params.json"
275
- # Passing None to these load functions is not kosher but it should
276
- # currently work for HF and only fail for original mode if
277
- # n_vocab or n_ff is missing in params.json
279
+ # We pass a fake model here. "original" mode will check the shapes of some
280
+ # tensors if information is missing in the .json file: other than that, the
281
+ # model data isn't used so this should be safe (at least for now).
282
+ fakemodel = {
283
+ 'tok_embeddings.weight' : convert .LazyTensor .__new__ (convert .LazyTensor ),
284
+ 'layers.0.feed_forward.w1.weight' : convert .LazyTensor .__new__ (convert .LazyTensor ),
285
+ }
286
+ fakemodel ['tok_embeddings.weight' ].shape = [hp .n_vocab ]
287
+ fakemodel ['layers.0.feed_forward.w1.weight' ].shape = [hp .n_ff ]
278
288
if hf_config_path .exists ():
279
- params = convert .Params .loadHFTransformerJson (None , hf_config_path )
289
+ params = convert .Params .loadHFTransformerJson (fakemodel , hf_config_path )
280
290
elif orig_config_path .exists ():
281
- params = convert .Params .loadOriginalParamsJson (None , orig_config_path )
291
+ params = convert .Params .loadOriginalParamsJson (fakemodel , orig_config_path )
282
292
else :
283
293
raise ValueError ('Unable to load metadata' )
284
294
vocab = convert .load_vocab (cfg .vocab_dir if cfg .vocab_dir is not None else cfg .model_metadata_dir , cfg .vocabtype )
@@ -303,20 +313,20 @@ def main():
303
313
cfg = handle_args ()
304
314
print (f'* Using config: { cfg } ' )
305
315
print ('\n === WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n ' )
316
+ data = np .memmap (cfg .input , mode = 'r' )
317
+ model = GGMLV3Model ()
318
+ print ('* Scanning GGML input file' )
319
+ offset = model .load (data , 0 )
320
+ print (f'* GGML model hyperparameters: { model .hyperparameters } ' )
306
321
vocab_override = None
307
322
params_override = None
308
323
if cfg .model_metadata_dir is not None :
309
- (params_override , vocab_override ) = handle_metadata (cfg )
324
+ (params_override , vocab_override ) = handle_metadata (cfg , model . hyperparameters )
310
325
print ('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.' )
311
326
print (f'* Overriding params: { params_override } ' )
312
327
print (f'* Overriding vocab: { vocab_override } ' )
313
328
else :
314
329
print ('\n === WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n ' )
315
- data = np .memmap (cfg .input , mode = 'r' )
316
- model = GGMLV3Model ()
317
- print ('* Scanning GGML input file' )
318
- offset = model .load (data , 0 )
319
- print (f'* GGML model hyperparameters: { model .hyperparameters } ' )
320
330
converter = GGMLToGGUF (model , data , cfg , params_override = params_override , vocab_override = vocab_override )
321
331
converter .save ()
322
332
print (f'* Successful completion. Output saved to: { cfg .output } ' )
0 commit comments