From 1b8762e38e785cfd5da8cfb639e664aac4a0d36d Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 11 Feb 2025 08:13:01 +0000 Subject: [PATCH 1/2] Done Signed-off-by: Jee Jee Li --- vllm/lora/models.py | 8 +++++++- vllm/lora/punica_wrapper/punica_base.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/vllm/lora/models.py b/vllm/lora/models.py index ef77fd4b74ce..b7403980d0b0 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -5,7 +5,8 @@ import os import re from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union +from typing import (Any, Callable, Dict, List, Optional, Sequence, Set, Type, + Union) import safetensors.torch import torch @@ -619,12 +620,14 @@ def _register_packed_modules(self, module_full_name: str) -> None: def _create_merged_loras_inplace(self, lora_model: LoRAModel) -> None: for module_name, new_module_names in self.packed_modules.items(): replacement_loras: List[Optional[LoRALayerWeights]] = [] + replaced_module: Set[str] = set() has_replacement = False for r in new_module_names: lora = lora_model.get_lora(r) replacement_loras.append(lora) if lora: has_replacement = True + replaced_module.add(r) if not has_replacement: continue for i in range(len(replacement_loras)): @@ -633,6 +636,9 @@ def _create_merged_loras_inplace(self, lora_model: LoRAModel) -> None: replacement_loras[i] = None lora_model.loras[module_name] = PackedLoRALayerWeights.pack( replacement_loras) + # Remove the modules that have been replaced. + for module in replaced_module: + lora_model.loras.pop(module, None) def deactivate_adapter(self, adapter_id: int) -> bool: return deactivate_adapter(adapter_id, self._active_adapters, diff --git a/vllm/lora/punica_wrapper/punica_base.py b/vllm/lora/punica_wrapper/punica_base.py index 1a2282ae9acc..dad98f8e2122 100644 --- a/vllm/lora/punica_wrapper/punica_base.py +++ b/vllm/lora/punica_wrapper/punica_base.py @@ -147,7 +147,7 @@ def __init__(self, max_num_batched_tokens: int, max_batches: int, dtype=torch.long, device=device) - # 5 is the number of indicies tensors. + # 5 is the number of indices tensors. # base_indices, sampler_indices, sampler_indices_padded, # embeddings_indices,long_lora_indices self.indices_len: List[Optional[int]] = [None] * 5 From d8b277343f72cb4782367085274e7dca63b3cf58 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 12 Feb 2025 10:43:36 +0000 Subject: [PATCH 2/2] Done Signed-off-by: Jee Jee Li --- tests/lora/test_lora_manager.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/lora/test_lora_manager.py b/tests/lora/test_lora_manager.py index 6666f54fdebd..9fecd11f57af 100644 --- a/tests/lora/test_lora_manager.py +++ b/tests/lora/test_lora_manager.py @@ -606,20 +606,26 @@ def test_packed_loras(dist_init, dummy_model_gate_up, device): assert isinstance(model.get_submodule("gate_up_proj"), MergedColumnParallelLinearWithLoRA) + # Verify packed lora is correct + model_lora_clone = model_lora.clone(1) + model_lora_clone1 = model_lora1.clone(1) assert manager.add_adapter(model_lora) assert manager.add_adapter(model_lora1) + assert model_lora.get_lora("gate_proj") is None + assert model_lora.get_lora("up_proj") is None + assert model_lora1.get_lora("up_proj") is None packed_lora = model_lora.get_lora("gate_up_proj") assert packed_lora and isinstance(packed_lora, PackedLoRALayerWeights) torch.testing.assert_close(packed_lora.lora_a[0], - model_lora.get_lora("gate_proj").lora_a) + model_lora_clone.get_lora("gate_proj").lora_a) torch.testing.assert_close(packed_lora.lora_b[0], - model_lora.get_lora("gate_proj").lora_b) + model_lora_clone.get_lora("gate_proj").lora_b) torch.testing.assert_close(packed_lora.lora_a[1], - model_lora.get_lora("up_proj").lora_a) + model_lora_clone.get_lora("up_proj").lora_a) torch.testing.assert_close(packed_lora.lora_b[1], - model_lora.get_lora("up_proj").lora_b) + model_lora_clone.get_lora("up_proj").lora_b) packed_lora1 = model_lora1.get_lora("gate_up_proj") assert packed_lora1 and isinstance(packed_lora1, PackedLoRALayerWeights) @@ -627,6 +633,6 @@ def test_packed_loras(dist_init, dummy_model_gate_up, device): assert packed_lora1.lora_a[0] is None assert packed_lora1.lora_b[0] is None torch.testing.assert_close(packed_lora1.lora_a[1], - model_lora1.get_lora("up_proj").lora_a) + model_lora_clone1.get_lora("up_proj").lora_a) torch.testing.assert_close(packed_lora1.lora_b[1], - model_lora1.get_lora("up_proj").lora_b) + model_lora_clone1.get_lora("up_proj").lora_b)