@@ -63,14 +63,15 @@ class Model:
63
63
model_name : str | None
64
64
metadata_override : Path | None
65
65
dir_model_card : Path
66
+ is_lora : bool
66
67
67
68
# subclasses should define this!
68
69
model_arch : gguf .MODEL_ARCH
69
70
70
71
def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool = False ,
71
72
use_temp_file : bool = False , eager : bool = False ,
72
73
metadata_override : Path | None = None , model_name : str | None = None ,
73
- split_max_tensors : int = 0 , split_max_size : int = 0 , dry_run : bool = False , small_first_shard : bool = False ):
74
+ split_max_tensors : int = 0 , split_max_size : int = 0 , dry_run : bool = False , small_first_shard : bool = False , is_lora : bool = False ):
74
75
if type (self ) is Model :
75
76
raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
76
77
@@ -92,6 +93,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
92
93
self .metadata_override = metadata_override
93
94
self .model_name = model_name
94
95
self .dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
96
+ self .is_lora = is_lora # true if model is used inside convert_lora_to_gguf.py
95
97
96
98
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
97
99
if self .ftype == gguf .LlamaFileType .GUESSED :
@@ -1593,7 +1595,8 @@ def prepare_tensors(self):
1593
1595
smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
1594
1596
rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
1595
1597
1596
- self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
1598
+ if not self .is_lora :
1599
+ self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
1597
1600
1598
1601
super ().prepare_tensors ()
1599
1602
@@ -2140,8 +2143,9 @@ def set_gguf_parameters(self):
2140
2143
if len (long_factors ) != len (short_factors ) or len (long_factors ) != rope_dims / 2 :
2141
2144
raise ValueError (f'The length of rope long and short factors must be { rope_dims / 2 } ' )
2142
2145
2143
- self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" , np .array (long_factors , dtype = np .float32 ))
2144
- self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" , np .array (short_factors , dtype = np .float32 ))
2146
+ if not self .is_lora :
2147
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" , np .array (long_factors , dtype = np .float32 ))
2148
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" , np .array (short_factors , dtype = np .float32 ))
2145
2149
2146
2150
2147
2151
@Model .register ("PlamoForCausalLM" )
@@ -3839,7 +3843,8 @@ def prepare_tensors(self):
3839
3843
smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
3840
3844
rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
3841
3845
3842
- self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
3846
+ if not self .is_lora :
3847
+ self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
3843
3848
3844
3849
super ().prepare_tensors ()
3845
3850
0 commit comments