23
23
from concurrent .futures import ProcessPoolExecutor , ThreadPoolExecutor
24
24
from dataclasses import dataclass
25
25
from pathlib import Path
26
- from typing import TYPE_CHECKING , Any , Callable , ClassVar , IO , Iterable , Literal , Protocol , TypeVar , runtime_checkable
26
+ from typing import TYPE_CHECKING , Any , Callable , ClassVar , IO , Iterable , Literal , Protocol , TypeVar , runtime_checkable , Optional
27
27
28
28
import numpy as np
29
29
from sentencepiece import SentencePieceProcessor
@@ -337,10 +337,46 @@ def load(model_plus: ModelPlus) -> Params:
337
337
return params
338
338
339
339
340
+ @dataclass
341
+ class Metadata :
342
+ name : Optional [str ] = None
343
+ author : Optional [str ] = None
344
+ version : Optional [str ] = None
345
+ url : Optional [str ] = None
346
+ description : Optional [str ] = None
347
+ licence : Optional [str ] = None
348
+ source_url : Optional [str ] = None
349
+ source_hf_repo : Optional [str ] = None
350
+
351
+ @staticmethod
352
+ def load (metadata_path : Path ) -> "Metadata" :
353
+ if metadata_path is None or not metadata_path .exists ():
354
+ return Metadata ()
355
+
356
+ with open (metadata_path , 'r' ) as file :
357
+ data = json .load (file )
358
+
359
+ # Create a new Metadata instance
360
+ metadata = Metadata ()
361
+
362
+ # Assigning values to Metadata attributes if they exist in the JSON file
363
+ metadata .name = data .get ("general.name" )
364
+ metadata .author = data .get ("general.author" )
365
+ metadata .version = data .get ("general.version" )
366
+ metadata .url = data .get ("general.url" )
367
+ metadata .description = data .get ("general.description" )
368
+ metadata .license = data .get ("general.license" )
369
+ metadata .source_url = data .get ("general.source_url" )
370
+ metadata .source_hf_repo = data .get ("general.source_hf_repo" )
371
+
372
+ return metadata
373
+
374
+
340
375
#
341
376
# vocab
342
377
#
343
378
379
+
344
380
@runtime_checkable
345
381
class BaseVocab (Protocol ):
346
382
tokenizer_model : ClassVar [str ]
@@ -1053,21 +1089,41 @@ class OutputFile:
1053
1089
def __init__ (self , fname_out : Path , endianess :gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ):
1054
1090
self .gguf = gguf .GGUFWriter (fname_out , gguf .MODEL_ARCH_NAMES [ARCH ], endianess = endianess )
1055
1091
1056
- def add_meta_arch (self , params : Params ) -> None :
1092
+ def add_meta_model (self , params : Params , metadata : Metadata ) -> None :
1093
+ # Metadata About The Model And It's Provenence
1057
1094
name = "LLaMA"
1058
-
1059
- # TODO: better logic to determine model name
1060
- if params .n_ctx == 4096 :
1061
- name = "LLaMA v2"
1095
+ if metadata is not None and metadata .name is not None :
1096
+ name = metadata .name
1062
1097
elif params .path_model is not None :
1063
- name = str (params .path_model .parent ).split ('/' )[- 1 ]
1064
-
1065
- self .gguf .add_name (name )
1066
- self .gguf .add_vocab_size (params .n_vocab )
1067
- self .gguf .add_context_length (params .n_ctx )
1068
- self .gguf .add_embedding_length (params .n_embd )
1069
- self .gguf .add_block_count (params .n_layer )
1070
- self .gguf .add_feed_forward_length (params .n_ff )
1098
+ name = str (params .path_model .parent ).split ("/" )[- 1 ]
1099
+ elif params .n_ctx == 4096 :
1100
+ # Heuristic detection of LLaMA v2 model
1101
+ name = "LLaMA v2"
1102
+
1103
+ self .gguf .add_name (name )
1104
+
1105
+ if metadata is not None :
1106
+ if metadata .author is not None :
1107
+ self .gguf .add_author (metadata .author )
1108
+ if metadata .version is not None :
1109
+ self .gguf .add_version (metadata .version )
1110
+ if metadata .url is not None :
1111
+ self .gguf .add_url (metadata .url )
1112
+ if metadata .description is not None :
1113
+ self .gguf .add_description (metadata .description )
1114
+ if metadata .licence is not None :
1115
+ self .gguf .add_licence (metadata .licence )
1116
+ if metadata .source_url is not None :
1117
+ self .gguf .add_source_url (metadata .source_url )
1118
+ if metadata .source_hf_repo is not None :
1119
+ self .gguf .add_source_hf_repo (metadata .source_hf_repo )
1120
+
1121
+ def add_meta_arch (self , params : Params ) -> None :
1122
+ # Metadata About The Neural Architecture Itself
1123
+ self .gguf .add_context_length (params .n_ctx )
1124
+ self .gguf .add_embedding_length (params .n_embd )
1125
+ self .gguf .add_block_count (params .n_layer )
1126
+ self .gguf .add_feed_forward_length (params .n_ff )
1071
1127
self .gguf .add_rope_dimension_count (params .n_embd // params .n_head )
1072
1128
self .gguf .add_head_count (params .n_head )
1073
1129
self .gguf .add_head_count_kv (params .n_head_kv )
@@ -1170,13 +1226,14 @@ def close(self) -> None:
1170
1226
@staticmethod
1171
1227
def write_vocab_only (
1172
1228
fname_out : Path , params : Params , vocab : Vocab , svocab : gguf .SpecialVocab ,
1173
- endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE , pad_vocab : bool = False ,
1229
+ endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE , pad_vocab : bool = False , metadata : Metadata = None ,
1174
1230
) -> None :
1175
1231
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1176
1232
1177
1233
of = OutputFile (fname_out , endianess = endianess )
1178
1234
1179
1235
# meta data
1236
+ of .add_meta_model (params , metadata )
1180
1237
of .add_meta_arch (params )
1181
1238
of .add_meta_vocab (vocab )
1182
1239
of .add_meta_special_vocab (svocab )
@@ -1203,12 +1260,14 @@ def write_all(
1203
1260
fname_out : Path , ftype : GGMLFileType , params : Params , model : LazyModel , vocab : BaseVocab , svocab : gguf .SpecialVocab ,
1204
1261
concurrency : int = DEFAULT_CONCURRENCY , endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1205
1262
pad_vocab : bool = False ,
1263
+ metadata : Metadata = None ,
1206
1264
) -> None :
1207
1265
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1208
1266
1209
1267
of = OutputFile (fname_out , endianess = endianess )
1210
1268
1211
1269
# meta data
1270
+ of .add_meta_model (params , metadata )
1212
1271
of .add_meta_arch (params )
1213
1272
if isinstance (vocab , Vocab ):
1214
1273
of .add_meta_vocab (vocab )
@@ -1244,6 +1303,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
1244
1303
raise ValueError (f"Unexpected combination of types: { name_to_type } " )
1245
1304
1246
1305
1306
+ def model_parameter_count (model : LazyModel ) -> int :
1307
+ total_model_parameters = 0
1308
+ for i , (name , lazy_tensor ) in enumerate (model .items ()):
1309
+ sum_weights_in_tensor = 1
1310
+ for dim in lazy_tensor .shape :
1311
+ sum_weights_in_tensor *= dim
1312
+ total_model_parameters += sum_weights_in_tensor
1313
+ return total_model_parameters
1314
+
1315
+
1316
+ def model_parameter_count_rounded_notation (model_params_count : int ) -> str :
1317
+ if model_params_count > 1e12 :
1318
+ # Trillions Of Parameters
1319
+ scaled_model_params = model_params_count * 1e-12
1320
+ scale_suffix = "T"
1321
+ elif model_params_count > 1e9 :
1322
+ # Billions Of Parameters
1323
+ scaled_model_params = model_params_count * 1e-9
1324
+ scale_suffix = "B"
1325
+ elif model_params_count > 1e6 :
1326
+ # Millions Of Parameters
1327
+ scaled_model_params = model_params_count * 1e-6
1328
+ scale_suffix = "M"
1329
+ else :
1330
+ # Thousands Of Parameters
1331
+ scaled_model_params = model_params_count * 1e-3
1332
+ scale_suffix = "K"
1333
+
1334
+ return f"{ round (scaled_model_params )} { scale_suffix } "
1335
+
1336
+
1247
1337
def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
1248
1338
return {name : tensor .astype (output_type .type_for_tensor (name , tensor ))
1249
1339
for (name , tensor ) in model .items ()}
@@ -1423,13 +1513,30 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
1423
1513
return vocab , special_vocab
1424
1514
1425
1515
1426
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
1427
- namestr = {
1428
- GGMLFileType .AllF32 : "f32 " ,
1429
- GGMLFileType .MostlyF16 : "f16 " ,
1430
- GGMLFileType .MostlyQ8_0 :"q8_0 " ,
1516
+ def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> Path :
1517
+ quantization = {
1518
+ GGMLFileType .AllF32 : "F32 " ,
1519
+ GGMLFileType .MostlyF16 : "F16 " ,
1520
+ GGMLFileType .MostlyQ8_0 : "Q8_0 " ,
1431
1521
}[file_type ]
1432
- ret = model_paths [0 ].parent / f"ggml-model-{ namestr } .gguf"
1522
+
1523
+ parameters = model_parameter_count_rounded_notation (model_params_count )
1524
+
1525
+ expert_count = ""
1526
+ if params .n_experts is not None :
1527
+ expert_count = f"{ params .n_experts } x"
1528
+
1529
+ version = ""
1530
+ if metadata is not None and metadata .version is not None :
1531
+ version = f"-{ metadata .version } "
1532
+
1533
+ name = "ggml-model"
1534
+ if metadata is not None and metadata .name is not None :
1535
+ name = metadata .name
1536
+ elif params .path_model is not None :
1537
+ name = params .path_model .name
1538
+
1539
+ ret = model_paths [0 ].parent / f"{ name } { version } -{ expert_count } { parameters } -{ quantization } .gguf"
1433
1540
if ret in model_paths :
1434
1541
sys .stderr .write (
1435
1542
f"Error: Default output path ({ ret } ) would overwrite the input. "
@@ -1466,8 +1573,12 @@ def main(args_in: list[str] | None = None) -> None:
1466
1573
parser .add_argument ("--big-endian" , action = "store_true" , help = "model is executed on big endian machine" )
1467
1574
parser .add_argument ("--pad-vocab" , action = "store_true" , help = "add pad tokens when model vocab expects more than tokenizer metadata provides" )
1468
1575
parser .add_argument ("--skip-unknown" , action = "store_true" , help = "skip unknown tensor names instead of failing" )
1576
+ parser .add_argument ("--metadata" , type = Path , help = "Specify the path for a metadata file" )
1469
1577
1470
1578
args = parser .parse_args (args_in )
1579
+
1580
+ metadata = Metadata .load (args .metadata )
1581
+
1471
1582
if args .no_vocab and args .vocab_only :
1472
1583
raise ValueError ("--vocab-only does not make sense with --no-vocab" )
1473
1584
@@ -1481,6 +1592,9 @@ def main(args_in: list[str] | None = None) -> None:
1481
1592
else :
1482
1593
model_plus = ModelPlus (model = {}, paths = [args .model / 'dummy' ], format = 'none' , vocab = None )
1483
1594
1595
+ model_params_count = model_parameter_count (model_plus .model )
1596
+ print (f"model parameters count : { model_params_count } ({ model_parameter_count_rounded_notation (model_params_count )} )" )
1597
+
1484
1598
if args .dump :
1485
1599
do_dump_model (model_plus )
1486
1600
return
@@ -1520,27 +1634,30 @@ def main(args_in: list[str] | None = None) -> None:
1520
1634
raise ValueError ("need --outfile if using --vocab-only" )
1521
1635
outfile = args .outfile
1522
1636
OutputFile .write_vocab_only (outfile , params , vocab , special_vocab ,
1523
- endianess = endianess , pad_vocab = args .pad_vocab )
1637
+ endianess = endianess , pad_vocab = args .pad_vocab , metadata = metadata )
1524
1638
print (f"Wrote { outfile } " )
1525
1639
return
1526
1640
1527
1641
if model_plus .vocab is not None and args .vocab_dir is None and not args .no_vocab :
1528
1642
vocab = model_plus .vocab
1529
1643
1530
1644
print (f"Vocab info: { vocab } " )
1531
- print (f"Special vocab info: { special_vocab } " )
1645
+ special_vocab = gguf .SpecialVocab (model_plus .paths [0 ].parent ,
1646
+ load_merges = True ,
1647
+ n_vocab = vocab .vocab_size )
1532
1648
1649
+ print (f"Special vocab info: { special_vocab } " )
1533
1650
model = model_plus .model
1534
1651
model = convert_model_names (model , params , args .skip_unknown )
1535
1652
ftype = pick_output_type (model , args .outtype )
1536
1653
model = convert_to_output_type (model , ftype )
1537
- outfile = args .outfile or default_outfile (model_plus .paths , ftype )
1654
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count , metadata )
1538
1655
1539
1656
params .ftype = ftype
1540
1657
print (f"Writing { outfile } , format { ftype } " )
1541
1658
1542
1659
OutputFile .write_all (outfile , ftype , params , model , vocab , special_vocab ,
1543
- concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab )
1660
+ concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab , metadata = metadata )
1544
1661
print (f"Wrote { outfile } " )
1545
1662
1546
1663
0 commit comments