Skip to content

Commit 840e7bf

Browse files
committed
convert.py: Outfile default name change and additional metadata support
1 parent a8bd14d commit 840e7bf

File tree

1 file changed

+142
-25
lines changed

1 file changed

+142
-25
lines changed

convert.py

Lines changed: 142 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
2424
from dataclasses import dataclass
2525
from pathlib import Path
26-
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable
26+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable, Optional
2727

2828
import numpy as np
2929
from sentencepiece import SentencePieceProcessor
@@ -337,10 +337,46 @@ def load(model_plus: ModelPlus) -> Params:
337337
return params
338338

339339

340+
@dataclass
341+
class Metadata:
342+
name: Optional[str] = None
343+
author: Optional[str] = None
344+
version: Optional[str] = None
345+
url: Optional[str] = None
346+
description: Optional[str] = None
347+
licence: Optional[str] = None
348+
source_url: Optional[str] = None
349+
source_hf_repo: Optional[str] = None
350+
351+
@staticmethod
352+
def load(metadata_path: Path) -> "Metadata":
353+
if metadata_path is None or not metadata_path.exists():
354+
return Metadata()
355+
356+
with open(metadata_path, 'r') as file:
357+
data = json.load(file)
358+
359+
# Create a new Metadata instance
360+
metadata = Metadata()
361+
362+
# Assigning values to Metadata attributes if they exist in the JSON file
363+
metadata.name = data.get("general.name")
364+
metadata.author = data.get("general.author")
365+
metadata.version = data.get("general.version")
366+
metadata.url = data.get("general.url")
367+
metadata.description = data.get("general.description")
368+
metadata.license = data.get("general.license")
369+
metadata.source_url = data.get("general.source_url")
370+
metadata.source_hf_repo = data.get("general.source_hf_repo")
371+
372+
return metadata
373+
374+
340375
#
341376
# vocab
342377
#
343378

379+
344380
@runtime_checkable
345381
class BaseVocab(Protocol):
346382
tokenizer_model: ClassVar[str]
@@ -1053,21 +1089,41 @@ class OutputFile:
10531089
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
10541090
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
10551091

1056-
def add_meta_arch(self, params: Params) -> None:
1092+
def add_meta_model(self, params: Params, metadata: Metadata) -> None:
1093+
# Metadata About The Model And It's Provenence
10571094
name = "LLaMA"
1058-
1059-
# TODO: better logic to determine model name
1060-
if params.n_ctx == 4096:
1061-
name = "LLaMA v2"
1095+
if metadata is not None and metadata.name is not None:
1096+
name = metadata.name
10621097
elif params.path_model is not None:
1063-
name = str(params.path_model.parent).split('/')[-1]
1064-
1065-
self.gguf.add_name (name)
1066-
self.gguf.add_vocab_size (params.n_vocab)
1067-
self.gguf.add_context_length (params.n_ctx)
1068-
self.gguf.add_embedding_length (params.n_embd)
1069-
self.gguf.add_block_count (params.n_layer)
1070-
self.gguf.add_feed_forward_length (params.n_ff)
1098+
name = str(params.path_model.parent).split("/")[-1]
1099+
elif params.n_ctx == 4096:
1100+
# Heuristic detection of LLaMA v2 model
1101+
name = "LLaMA v2"
1102+
1103+
self.gguf.add_name(name)
1104+
1105+
if metadata is not None:
1106+
if metadata.author is not None:
1107+
self.gguf.add_author(metadata.author)
1108+
if metadata.version is not None:
1109+
self.gguf.add_version(metadata.version)
1110+
if metadata.url is not None:
1111+
self.gguf.add_url(metadata.url)
1112+
if metadata.description is not None:
1113+
self.gguf.add_description(metadata.description)
1114+
if metadata.licence is not None:
1115+
self.gguf.add_licence(metadata.licence)
1116+
if metadata.source_url is not None:
1117+
self.gguf.add_source_url(metadata.source_url)
1118+
if metadata.source_hf_repo is not None:
1119+
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
1120+
1121+
def add_meta_arch(self, params: Params) -> None:
1122+
# Metadata About The Neural Architecture Itself
1123+
self.gguf.add_context_length(params.n_ctx)
1124+
self.gguf.add_embedding_length(params.n_embd)
1125+
self.gguf.add_block_count(params.n_layer)
1126+
self.gguf.add_feed_forward_length(params.n_ff)
10711127
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
10721128
self.gguf.add_head_count (params.n_head)
10731129
self.gguf.add_head_count_kv (params.n_head_kv)
@@ -1170,13 +1226,14 @@ def close(self) -> None:
11701226
@staticmethod
11711227
def write_vocab_only(
11721228
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
1173-
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
1229+
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
11741230
) -> None:
11751231
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11761232

11771233
of = OutputFile(fname_out, endianess=endianess)
11781234

11791235
# meta data
1236+
of.add_meta_model(params, metadata)
11801237
of.add_meta_arch(params)
11811238
of.add_meta_vocab(vocab)
11821239
of.add_meta_special_vocab(svocab)
@@ -1203,12 +1260,14 @@ def write_all(
12031260
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
12041261
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
12051262
pad_vocab: bool = False,
1263+
metadata: Metadata = None,
12061264
) -> None:
12071265
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
12081266

12091267
of = OutputFile(fname_out, endianess=endianess)
12101268

12111269
# meta data
1270+
of.add_meta_model(params, metadata)
12121271
of.add_meta_arch(params)
12131272
if isinstance(vocab, Vocab):
12141273
of.add_meta_vocab(vocab)
@@ -1244,6 +1303,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
12441303
raise ValueError(f"Unexpected combination of types: {name_to_type}")
12451304

12461305

1306+
def model_parameter_count(model: LazyModel) -> int:
1307+
total_model_parameters = 0
1308+
for i, (name, lazy_tensor) in enumerate(model.items()):
1309+
sum_weights_in_tensor = 1
1310+
for dim in lazy_tensor.shape:
1311+
sum_weights_in_tensor *= dim
1312+
total_model_parameters += sum_weights_in_tensor
1313+
return total_model_parameters
1314+
1315+
1316+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1317+
if model_params_count > 1e12 :
1318+
# Trillions Of Parameters
1319+
scaled_model_params = model_params_count * 1e-12
1320+
scale_suffix = "T"
1321+
elif model_params_count > 1e9 :
1322+
# Billions Of Parameters
1323+
scaled_model_params = model_params_count * 1e-9
1324+
scale_suffix = "B"
1325+
elif model_params_count > 1e6 :
1326+
# Millions Of Parameters
1327+
scaled_model_params = model_params_count * 1e-6
1328+
scale_suffix = "M"
1329+
else:
1330+
# Thousands Of Parameters
1331+
scaled_model_params = model_params_count * 1e-3
1332+
scale_suffix = "K"
1333+
1334+
return f"{round(scaled_model_params)}{scale_suffix}"
1335+
1336+
12471337
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
12481338
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
12491339
for (name, tensor) in model.items()}
@@ -1423,13 +1513,30 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
14231513
return vocab, special_vocab
14241514

14251515

1426-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1427-
namestr = {
1428-
GGMLFileType.AllF32: "f32",
1429-
GGMLFileType.MostlyF16: "f16",
1430-
GGMLFileType.MostlyQ8_0:"q8_0",
1516+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1517+
quantization = {
1518+
GGMLFileType.AllF32: "F32",
1519+
GGMLFileType.MostlyF16: "F16",
1520+
GGMLFileType.MostlyQ8_0: "Q8_0",
14311521
}[file_type]
1432-
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
1522+
1523+
parameters = model_parameter_count_rounded_notation(model_params_count)
1524+
1525+
expert_count = ""
1526+
if params.n_experts is not None:
1527+
expert_count = f"{params.n_experts}x"
1528+
1529+
version = ""
1530+
if metadata is not None and metadata.version is not None:
1531+
version = f"-{metadata.version}"
1532+
1533+
name = "ggml-model"
1534+
if metadata is not None and metadata.name is not None:
1535+
name = metadata.name
1536+
elif params.path_model is not None:
1537+
name = params.path_model.name
1538+
1539+
ret = model_paths[0].parent / f"{name}{version}-{expert_count}{parameters}-{quantization}.gguf"
14331540
if ret in model_paths:
14341541
sys.stderr.write(
14351542
f"Error: Default output path ({ret}) would overwrite the input. "
@@ -1466,8 +1573,12 @@ def main(args_in: list[str] | None = None) -> None:
14661573
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
14671574
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
14681575
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
1576+
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
14691577

14701578
args = parser.parse_args(args_in)
1579+
1580+
metadata = Metadata.load(args.metadata)
1581+
14711582
if args.no_vocab and args.vocab_only:
14721583
raise ValueError("--vocab-only does not make sense with --no-vocab")
14731584

@@ -1481,6 +1592,9 @@ def main(args_in: list[str] | None = None) -> None:
14811592
else:
14821593
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
14831594

1595+
model_params_count = model_parameter_count(model_plus.model)
1596+
print(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1597+
14841598
if args.dump:
14851599
do_dump_model(model_plus)
14861600
return
@@ -1520,27 +1634,30 @@ def main(args_in: list[str] | None = None) -> None:
15201634
raise ValueError("need --outfile if using --vocab-only")
15211635
outfile = args.outfile
15221636
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
1523-
endianess=endianess, pad_vocab=args.pad_vocab)
1637+
endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15241638
print(f"Wrote {outfile}")
15251639
return
15261640

15271641
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
15281642
vocab = model_plus.vocab
15291643

15301644
print(f"Vocab info: {vocab}")
1531-
print(f"Special vocab info: {special_vocab}")
1645+
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
1646+
load_merges = True,
1647+
n_vocab = vocab.vocab_size)
15321648

1649+
print(f"Special vocab info: {special_vocab}")
15331650
model = model_plus.model
15341651
model = convert_model_names(model, params, args.skip_unknown)
15351652
ftype = pick_output_type(model, args.outtype)
15361653
model = convert_to_output_type(model, ftype)
1537-
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
1654+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
15381655

15391656
params.ftype = ftype
15401657
print(f"Writing {outfile}, format {ftype}")
15411658

15421659
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
1543-
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
1660+
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15441661
print(f"Wrote {outfile}")
15451662

15461663

0 commit comments

Comments
 (0)