Skip to content

Commit f4e85a1

Browse files
ngxsonarthw
authored andcommitted
lora : fix llama conversion script with ROPE_FREQS (ggml-org#9117)
1 parent 5ff6c1e commit f4e85a1

File tree

3 files changed

+12
-6
lines changed

3 files changed

+12
-6
lines changed

convert_hf_to_gguf.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,15 @@ class Model:
6363
model_name: str | None
6464
metadata_override: Path | None
6565
dir_model_card: Path
66+
is_lora: bool
6667

6768
# subclasses should define this!
6869
model_arch: gguf.MODEL_ARCH
6970

7071
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
7172
use_temp_file: bool = False, eager: bool = False,
7273
metadata_override: Path | None = None, model_name: str | None = None,
73-
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
74+
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False, is_lora: bool = False):
7475
if type(self) is Model:
7576
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
7677

@@ -92,6 +93,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
9293
self.metadata_override = metadata_override
9394
self.model_name = model_name
9495
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
96+
self.is_lora = is_lora # true if model is used inside convert_lora_to_gguf.py
9597

9698
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
9799
if self.ftype == gguf.LlamaFileType.GUESSED:
@@ -1593,7 +1595,8 @@ def prepare_tensors(self):
15931595
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
15941596
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
15951597

1596-
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
1598+
if not self.is_lora:
1599+
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
15971600

15981601
super().prepare_tensors()
15991602

@@ -2140,8 +2143,9 @@ def set_gguf_parameters(self):
21402143
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
21412144
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
21422145

2143-
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
2144-
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
2146+
if not self.is_lora:
2147+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
2148+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
21452149

21462150

21472151
@Model.register("PlamoForCausalLM")
@@ -3839,7 +3843,8 @@ def prepare_tensors(self):
38393843
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
38403844
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
38413845

3842-
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
3846+
if not self.is_lora:
3847+
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
38433848

38443849
super().prepare_tensors()
38453850

convert_lora_to_gguf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
386386
dry_run=args.dry_run,
387387
dir_lora_model=dir_lora,
388388
lora_alpha=alpha,
389+
is_lora=True,
389390
)
390391

391392
logger.info("Exporting model...")

tests/test-lora-conversion-inference.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO
1414
# Clone the Hugging Face repository if the directory does not exist
1515
if [ ! -d "$MODELS_REPO" ]; then
1616
echo "Cloning the Hugging Face repository..."
17-
git clone $MODELS_REPO_URL
17+
git clone $MODELS_REPO_URL --depth 1
1818
else
1919
echo "Repository already exists. Skipping clone."
2020
fi

0 commit comments

Comments
 (0)