@@ -697,12 +697,12 @@ struct gguf_file_saver {
697
697
// we need to calculate the delta in number of bytes written with a counter as a struct member.
698
698
699
699
gguf_file file;
700
- gguf_file_loader * fl;
700
+ gguf_context * ctx; // loaded gguf context (used to re-write the KV section (good enough for now))
701
701
size_t info_offset;
702
702
size_t tensor_offset = 0 ;
703
703
704
- gguf_file_saver (const char * fname, gguf_file_loader * fl )
705
- : file(fname, " wb" ), fl(fl ) {
704
+ gguf_file_saver (const char * fname, gguf_context * ctx )
705
+ : file(fname, " wb" ), ctx(ctx ) {
706
706
fprintf (stderr, " llama.cpp: saving model to %s\n " , fname);
707
707
write_header ();
708
708
write_kv ();
@@ -711,15 +711,15 @@ struct gguf_file_saver {
711
711
void write_header () {
712
712
file.write_i32 (GGUF_MAGIC);
713
713
file.write_i32 (GGUF_VERSION);
714
- file.write_i32 (gguf_get_n_tensors (fl-> gguf_ctx ));
715
- file.write_i32 (gguf_get_n_kv (fl-> gguf_ctx ));
714
+ file.write_i32 (gguf_get_n_tensors (ctx ));
715
+ file.write_i32 (gguf_get_n_kv (ctx ));
716
716
}
717
717
718
718
void write_kv_arr_str (const std::string & key, enum gguf_type type, int i, int n_arr) {
719
719
std::vector<std::string> data (n_arr);
720
720
721
721
for (int j = 0 ; j < n_arr; ++j) {
722
- std::string val = gguf_get_arr_str (fl-> gguf_ctx , i, j);
722
+ std::string val = gguf_get_arr_str (ctx , i, j);
723
723
data[j] = val;
724
724
}
725
725
@@ -730,7 +730,7 @@ struct gguf_file_saver {
730
730
std::vector<float > data (n_arr);
731
731
732
732
for (int j = 0 ; j < n_arr; ++j) {
733
- float val = gguf_get_arr_f32 (fl-> gguf_ctx , i, j);
733
+ float val = gguf_get_arr_f32 (ctx , i, j);
734
734
data[j] = val;
735
735
}
736
736
@@ -739,28 +739,28 @@ struct gguf_file_saver {
739
739
740
740
// re-write the key-value section from the loaded file
741
741
void write_kv () {
742
- const int32_t n_kv = gguf_get_n_kv (fl-> gguf_ctx );
742
+ const int32_t n_kv = gguf_get_n_kv (ctx );
743
743
for (int i = 0 ; i < n_kv; ++i) {
744
- const char * key = gguf_get_key (fl-> gguf_ctx , i);
744
+ const char * key = gguf_get_key (ctx , i);
745
745
if (strcmp (key, " general.quantization_version" ) == 0 ) {
746
746
file.write_val <uint32_t >(" general.quantization_version" , GGUF_TYPE_UINT32, GGML_QNT_VERSION);
747
747
} else {
748
- const gguf_type vtype = gguf_get_kv_type (fl-> gguf_ctx , i);
748
+ const gguf_type vtype = gguf_get_kv_type (ctx , i);
749
749
750
750
switch (vtype) {
751
- case GGUF_TYPE_BOOL: file.write_val <bool > (key, GGUF_TYPE_BOOL, gguf_get_val_bool (fl-> gguf_ctx , i)); break ;
752
- case GGUF_TYPE_FLOAT32: file.write_val <float > (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (fl-> gguf_ctx , i)); break ;
753
- case GGUF_TYPE_INT16: file.write_val <int16_t > (key, GGUF_TYPE_INT16, gguf_get_val_i16 (fl-> gguf_ctx , i)); break ;
754
- case GGUF_TYPE_INT32: file.write_val <int32_t > (key, GGUF_TYPE_INT32, gguf_get_val_i32 (fl-> gguf_ctx , i)); break ;
755
- case GGUF_TYPE_INT8: file.write_val <int8_t > (key, GGUF_TYPE_INT8, gguf_get_val_i8 (fl-> gguf_ctx , i)); break ;
756
- case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (fl-> gguf_ctx , i)); break ;
757
- case GGUF_TYPE_UINT16: file.write_val <uint16_t >(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (fl-> gguf_ctx , i)); break ;
758
- case GGUF_TYPE_UINT32: file.write_val <uint32_t >(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (fl-> gguf_ctx , i)); break ;
759
- case GGUF_TYPE_UINT8: file.write_val <uint8_t > (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (fl-> gguf_ctx , i)); break ;
751
+ case GGUF_TYPE_BOOL: file.write_val <bool > (key, GGUF_TYPE_BOOL, gguf_get_val_bool (ctx , i)); break ;
752
+ case GGUF_TYPE_FLOAT32: file.write_val <float > (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (ctx , i)); break ;
753
+ case GGUF_TYPE_INT16: file.write_val <int16_t > (key, GGUF_TYPE_INT16, gguf_get_val_i16 (ctx , i)); break ;
754
+ case GGUF_TYPE_INT32: file.write_val <int32_t > (key, GGUF_TYPE_INT32, gguf_get_val_i32 (ctx , i)); break ;
755
+ case GGUF_TYPE_INT8: file.write_val <int8_t > (key, GGUF_TYPE_INT8, gguf_get_val_i8 (ctx , i)); break ;
756
+ case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (ctx , i)); break ;
757
+ case GGUF_TYPE_UINT16: file.write_val <uint16_t >(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (ctx , i)); break ;
758
+ case GGUF_TYPE_UINT32: file.write_val <uint32_t >(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (ctx , i)); break ;
759
+ case GGUF_TYPE_UINT8: file.write_val <uint8_t > (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (ctx , i)); break ;
760
760
case GGUF_TYPE_ARRAY:
761
761
{
762
- const gguf_type arr_type = gguf_get_arr_type (fl-> gguf_ctx , i);
763
- const int n_arr = gguf_get_arr_n (fl-> gguf_ctx , i);
762
+ const gguf_type arr_type = gguf_get_arr_type (ctx , i);
763
+ const int n_arr = gguf_get_arr_n (ctx , i);
764
764
if (arr_type == GGUF_TYPE_FLOAT32) {
765
765
write_kv_arr_f32 (key, arr_type, i, n_arr);
766
766
} else if (arr_type == GGUF_TYPE_STRING) {
@@ -777,9 +777,9 @@ struct gguf_file_saver {
777
777
778
778
info_offset = file.tell ();
779
779
780
- GGML_ASSERT (gguf_get_data_offset (fl-> gguf_ctx ) >= info_offset);
780
+ GGML_ASSERT (gguf_get_data_offset (ctx ) >= info_offset);
781
781
782
- size_t count = gguf_get_data_offset (fl-> gguf_ctx ) - info_offset;
782
+ size_t count = gguf_get_data_offset (ctx ) - info_offset;
783
783
file.write_zeros (count);
784
784
file.seek (info_offset, SEEK_SET);
785
785
GGML_ASSERT (info_offset == file.tell ());
@@ -3220,7 +3220,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
3220
3220
}
3221
3221
3222
3222
std::unique_ptr<llama_model_loader> model_loader (new llama_model_loader (fname_inp, /* use_mmap*/ false ));
3223
- gguf_file_saver file_saver (fname_out.c_str (), model_loader->file_loader . get () );
3223
+ gguf_file_saver file_saver (fname_out.c_str (), model_loader->file_loader -> gguf_ctx );
3224
3224
3225
3225
#ifdef GGML_USE_K_QUANTS
3226
3226
int n_attention_wv = 0 ;
0 commit comments