Skip to content

Commit 2e07b99

Browse files
committed
wip
1 parent 6c63550 commit 2e07b99

File tree

3 files changed

+59
-76
lines changed

3 files changed

+59
-76
lines changed

ggml.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,12 +1744,12 @@ extern "C" {
17441744
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
17451745
GGML_API void * gguf_get_data (struct gguf_context * ctx);
17461746

1747-
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
1748-
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
1749-
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
1747+
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
1748+
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
1749+
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
1750+
17501751
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
1751-
GGML_API enum gguf_type gguf_get_arr_type (struct gguf_context * ctx, int i);
1752-
GGML_API void gguf_get_val (struct gguf_context * ctx, int i, void * val);
1752+
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
17531753

17541754
GGML_API const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i);
17551755
GGML_API float gguf_get_arr_f32(struct gguf_context * ctx, int key_id, int i);

gguf-llama.cpp

Lines changed: 53 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -573,20 +573,19 @@ struct gguf_file_loader {
573573

574574
struct ggml_context * ctx_data = NULL;
575575

576-
gguf_file_loader(const char * fname, gguf_load_tensors_map & tensors_map)
577-
: file(fname, "rb") {
576+
gguf_file_loader(const char * fname, gguf_load_tensors_map & tensors_map) : file(fname, "rb") {
578577
fprintf(stderr, "llama.cpp: loading model from %s\n", fname);
579578

580-
struct gguf_init_params params = {
581-
/*.no_alloc = */ true,
582-
/*.ctx = */ &ctx_data,
583-
};
579+
struct gguf_init_params params = {
580+
/*.no_alloc = */ true,
581+
/*.ctx = */ &ctx_data,
582+
};
584583

585-
gguf_ctx = gguf_init_from_file(fname, params);
586-
file_version = (enum gguf_file_version) gguf_get_version(gguf_ctx);
584+
gguf_ctx = gguf_init_from_file(fname, params);
585+
file_version = (enum gguf_file_version) gguf_get_version(gguf_ctx);
587586

588-
read_hparams();
589-
read_vocab();
587+
read_hparams();
588+
read_vocab();
590589
read_tensor_metadata(tensors_map);
591590
}
592591

@@ -636,18 +635,18 @@ struct gguf_file_loader {
636635

637636
void read_vocab() {
638637
vocab.id_to_token.resize(hparams.n_vocab);
639-
int token_idx = gguf_find_key(gguf_ctx, "tokenizer.ggml.tokens");
638+
639+
const int token_idx = gguf_find_key(gguf_ctx, "tokenizer.ggml.tokens");
640640
if (token_idx == -1) {
641641
throw std::runtime_error("cannot find token list in GGUF file\n");
642642
}
643643

644-
int score_idx = gguf_find_key(gguf_ctx, "tokenizer.ggml.scores");
644+
const int score_idx = gguf_find_key(gguf_ctx, "tokenizer.ggml.scores");
645645
if (score_idx == -1) {
646646
throw std::runtime_error("cannot find token scores list in GGUF file\n");
647647
}
648648

649649
for (uint32_t i = 0; i < hparams.n_vocab; i++) {
650-
651650
std::string word = gguf_get_arr_str(gguf_ctx, token_idx, i);
652651

653652
vocab.token_to_id[word] = i;
@@ -786,7 +785,7 @@ struct gguf_file_saver {
786785
gguf_type arr_type;
787786
int n_arr;
788787

789-
switch(vtype) {
788+
switch (vtype) {
790789
case GGUF_TYPE_BOOL:
791790
bool_val = gguf_get_val_bool(fl->gguf_ctx, i);
792791
file.write_val<bool>(key, GGUF_TYPE_BOOL, bool_val);
@@ -809,7 +808,7 @@ struct gguf_file_saver {
809808
break;
810809
case GGUF_TYPE_STRING:
811810
str_val = gguf_get_val_str(fl->gguf_ctx, i);
812-
file.write_val<std::string>(key, GGUF_TYPE_STRING, str_val);
811+
file.write_str(key, GGUF_TYPE_STRING, str_val);
813812
break;
814813
case GGUF_TYPE_UINT16:
815814
u16_val = gguf_get_val_u16(fl->gguf_ctx, i);
@@ -825,7 +824,7 @@ struct gguf_file_saver {
825824
break;
826825
case GGUF_TYPE_ARRAY:
827826
arr_type = gguf_get_arr_type(fl->gguf_ctx, i);
828-
n_arr = gguf_get_arr_n(fl->gguf_ctx, i);
827+
n_arr = gguf_get_arr_n (fl->gguf_ctx, i);
829828
if (arr_type == GGUF_TYPE_FLOAT32) {
830829
write_hparam_arr_f32(key, arr_type, i, n_arr);
831830
} else if (arr_type == GGUF_TYPE_STRING) {
@@ -922,20 +921,6 @@ struct llama_model_loader {
922921
}
923922
}
924923

925-
struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
926-
auto it = tensors_map.name_to_idx.find(name);
927-
if (it == tensors_map.name_to_idx.end()) {
928-
throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str())));
929-
}
930-
gguf_load_tensor & lt = tensors_map.tensors.at(it->second);
931-
if (lt.ne != ne) {
932-
throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
933-
name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()));
934-
}
935-
936-
return get_tensor_for(lt, backend);
937-
}
938-
939924
struct ggml_tensor * get_tensor_for(gguf_load_tensor & lt, ggml_backend backend) {
940925
struct ggml_tensor * tensor;
941926
if (backend != GGML_BACKEND_CPU) {
@@ -959,16 +944,41 @@ struct llama_model_loader {
959944
return tensor;
960945
}
961946

947+
struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
948+
auto it = tensors_map.name_to_idx.find(name);
949+
if (it == tensors_map.name_to_idx.end()) {
950+
throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str())));
951+
}
952+
gguf_load_tensor & lt = tensors_map.tensors.at(it->second);
953+
if (lt.ne != ne) {
954+
throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
955+
name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()));
956+
}
957+
958+
return get_tensor_for(lt, backend);
959+
}
960+
962961
void done_getting_tensors() const {
963962
if (num_ggml_tensors_created != tensors_map.tensors.size()) {
964963
throw std::runtime_error(std::string("llama.cpp: file contained more tensors than expected"));
965964
}
966965
}
967966

968-
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, gguf_mlock * lmlock) {
969-
size_t data_size = 0;
967+
void load_data_for(gguf_load_tensor & lt) const {
968+
if (use_mmap) {
969+
lt.data = (uint8_t *) mapping->addr + lt.file_off;
970+
} else {
971+
gguf_file & file = file_loader->file;
972+
file.seek(lt.file_off, SEEK_SET);
973+
file.read_raw(lt.data, lt.size);
974+
}
975+
}
976+
977+
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, gguf_mlock * lmlock) {
978+
size_t data_size = 0;
970979
size_t prefetch_size = 0;
971-
size_t lock_size = 0;
980+
size_t lock_size = 0;
981+
972982
for (const gguf_load_tensor & lt : tensors_map.tensors) {
973983
data_size += lt.size;
974984
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
@@ -1030,31 +1040,6 @@ struct llama_model_loader {
10301040
done_size += lt.size;
10311041
}
10321042
}
1033-
1034-
void load_data_for(gguf_load_tensor & lt) {
1035-
if (use_mmap) {
1036-
lt.data = (uint8_t *) mapping->addr + lt.file_off;
1037-
} else {
1038-
gguf_file & file = file_loader->file;
1039-
file.seek(lt.file_off, SEEK_SET);
1040-
file.read_raw(lt.data, lt.size);
1041-
}
1042-
1043-
if (0) {
1044-
print_checksum(lt);
1045-
}
1046-
}
1047-
1048-
static void print_checksum(gguf_load_tensor & lt) {
1049-
uint32_t sum = 0;
1050-
for (size_t i = 0; i < lt.size; i++) {
1051-
uint8_t byte = lt.data[i];
1052-
sum = byte + (sum << 6) + (sum << 16) - sum; // sdbm hash
1053-
}
1054-
fprintf(stderr, "%s checksum: %#08x (%s, size %zu)\n", lt.name.c_str(), sum,
1055-
llama_format_tensor_shape(lt.ne).c_str(), lt.size);
1056-
}
1057-
10581043
};
10591044

10601045
//
@@ -1187,15 +1172,15 @@ int64_t llama_time_us() {
11871172
// model loading
11881173
//
11891174

1190-
static const char *gguf_file_version_name(gguf_file_version version) {
1175+
static const char * gguf_file_version_name(gguf_file_version version) {
11911176
switch (version) {
11921177
case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)";
1193-
}
1178+
}
11941179

11951180
return "unknown";
11961181
}
11971182

1198-
static const char *llama_ftype_name(enum llama_ftype ftype) {
1183+
static const char * llama_ftype_name(enum llama_ftype ftype) {
11991184
switch (ftype) {
12001185
case LLAMA_FTYPE_ALL_F32: return "all F32";
12011186
case LLAMA_FTYPE_MOSTLY_F16: return "mostly F16";
@@ -1220,10 +1205,10 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
12201205
}
12211206
}
12221207

1223-
static const char *llama_model_type_name(e_model type) {
1208+
static const char * llama_model_type_name(e_model type) {
12241209
switch (type) {
1225-
case MODEL_3B: return "3B";
1226-
case MODEL_7B: return "7B";
1210+
case MODEL_3B: return "3B";
1211+
case MODEL_7B: return "7B";
12271212
case MODEL_13B: return "13B";
12281213
case MODEL_30B: return "30B";
12291214
case MODEL_65B: return "65B";
@@ -2996,9 +2981,8 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
29962981
}
29972982
}
29982983

2999-
const auto rejects =
3000-
llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar);
3001-
for (auto & reject : rejects) {
2984+
const auto rejects = llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar);
2985+
for (const auto & reject : rejects) {
30022986
candidates->data[reject.index].logit = -INFINITY;
30032987
}
30042988

@@ -3725,7 +3709,7 @@ void llama_free(struct llama_context * ctx) {
37253709
int llama_model_quantize(
37263710
const char * fname_inp,
37273711
const char * fname_out,
3728-
const llama_model_quantize_params *params) {
3712+
const llama_model_quantize_params * params) {
37293713
try {
37303714
llama_model_quantize_internal(fname_inp, fname_out, params);
37313715
return 0;
@@ -4343,8 +4327,7 @@ static bool llama_load_session_file_internal(struct llama_context * ctx, const c
43434327
GGML_UNUSED(n_token_capacity);
43444328
GGML_UNUSED(n_token_count_out);
43454329

4346-
4347-
// TODO: implement with GGUF format
4330+
// TODO: implement with GGUF format
43484331
return true;
43494332
}
43504333

@@ -4389,7 +4372,6 @@ int llama_eval(
43894372
return 0;
43904373
}
43914374

4392-
43934375
int llama_eval_embd(
43944376
struct llama_context * ctx,
43954377
const float * embd,

gguf-util.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ struct gguf_file {
122122

123123
template<typename T>
124124
void write_val(const std::string & key, enum gguf_type type, const T & val) {
125+
static_assert(std::is_fundamental<T>::value, "T must be a primitive type");
125126
write_str(key);
126127
fwrite((const char *) &type, sizeof(type), 1, fp);
127128
fwrite((const char *) &val, sizeof(val), 1, fp);

0 commit comments

Comments
 (0)