-
Notifications
You must be signed in to change notification settings - Fork 11.7k
Add script to convert old ggml files to newer version #539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#!/usr/bin/env python3 | ||
# Original by https://github.com/eiz | ||
# https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818 | ||
import argparse | ||
import glob | ||
import os | ||
import struct | ||
import sys | ||
from sentencepiece import SentencePieceProcessor | ||
|
||
HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Upgrade old ggml model files to the current format') | ||
parser.add_argument('dir_model', help='directory containing ggml .bin files') | ||
parser.add_argument('tokenizer_model', help='path to LLaMA tokenizer.model file') | ||
return parser.parse_args() | ||
|
||
def read_header(f_in): | ||
struct_fmt = "i" * (3 + len(HPARAMS)) | ||
struct_size = struct.calcsize(struct_fmt) | ||
buf = f_in.read(struct_size) | ||
return struct.unpack(struct_fmt, buf) | ||
|
||
def write_header(f_out, header): | ||
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header | ||
|
||
if magic != 0x67676d6c: | ||
raise Exception('Invalid file magic. Must be an old style ggml file.') | ||
|
||
values = [ | ||
0x67676d66, # magic: ggml in hex | ||
1, # file version | ||
vocab_size, | ||
dim, | ||
multiple_of, | ||
n_heads, | ||
n_layers, | ||
rot, | ||
ftype | ||
] | ||
f_out.write(struct.pack("i" * len(values), *values)) | ||
|
||
def write_tokens(fout, tokenizer): | ||
for i in range(tokenizer.vocab_size()): | ||
if tokenizer.is_unknown(i): | ||
text = " \u2047 ".encode("utf-8") | ||
elif tokenizer.is_control(i): | ||
text = b"" | ||
elif tokenizer.is_byte(i): | ||
piece = tokenizer.id_to_piece(i) | ||
if len(piece) != 6: | ||
print(f"Invalid token: {piece}") | ||
sys.exit(1) | ||
byte_value = int(piece[3:-1], 16) | ||
text = struct.pack("B", byte_value) | ||
else: | ||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") | ||
fout.write(struct.pack("i", len(text))) | ||
fout.write(text) | ||
fout.write(struct.pack("f", tokenizer.get_score(i))) | ||
|
||
def read_tokens(f_in, tokenizer): | ||
for i in range(tokenizer.vocab_size()): | ||
len_b = f_in.read(4) | ||
(length,) = struct.unpack("i", len_b) | ||
f_in.read(length) | ||
|
||
def copy_all_data(f_out, f_in): | ||
while True: | ||
buf = f_in.read(1024 * 1024) | ||
if not buf: | ||
break | ||
f_out.write(buf) | ||
|
||
def convert_one_file(path_in, tokenizer): | ||
path_tmp = f"{path_in}.tmp" | ||
path_orig= f"{path_in}.orig" | ||
print(f"converting {path_in}") | ||
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out: | ||
write_header(f_out, read_header(f_in)) | ||
read_tokens(f_in, tokenizer) | ||
write_tokens(f_out, tokenizer) | ||
copy_all_data(f_out, f_in) | ||
os.rename(path_in, path_orig) | ||
os.rename(path_tmp, path_in) | ||
|
||
def main(): | ||
args = parse_args() | ||
files = [] | ||
files.extend(glob.glob(f"{args.dir_model}/*.bin")) | ||
files.extend(glob.glob(f"{args.dir_model}/*.bin.*")) | ||
|
||
tokenizer = SentencePieceProcessor(args.tokenizer_model) | ||
|
||
for file in files: | ||
convert_one_file(file, tokenizer) | ||
|
||
if __name__ == "__main__": | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -320,7 +320,7 @@ static bool llama_model_load( | |
uint32_t magic; | ||
fin.read((char *) &magic, sizeof(magic)); | ||
if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) { | ||
fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", | ||
fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files or convert them with convert-unversioned-ggml-to-ggml.py!)\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this shouldn't be changed, as the conversion script is unsupported. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's user friendly if it gives a hint to user how to resolve the situation with incompatible model. |
||
__func__, fname.c_str()); | ||
return false; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wait im confused, isn't it supposed to be old style ggml file for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It expects the unversioned GGML model and produces the versioned one.
#define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files