@@ -72,6 +72,7 @@ class ModelBase:
72
72
endianess : gguf .GGUFEndian
73
73
use_temp_file : bool
74
74
lazy : bool
75
+ dry_run : bool
75
76
part_names : list [str ]
76
77
is_safetensors : bool
77
78
hparams : dict [str , Any ]
@@ -111,6 +112,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
111
112
self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
112
113
self .use_temp_file = use_temp_file
113
114
self .lazy = not eager or (remote_hf_model_id is not None )
115
+ self .dry_run = dry_run
114
116
self .remote_hf_model_id = remote_hf_model_id
115
117
if remote_hf_model_id is not None :
116
118
self .is_safetensors = True
@@ -4871,11 +4873,35 @@ def modify_tensors(self, data_torch, name, bid):
4871
4873
@ModelBase .register ("XLMRobertaModel" , "XLMRobertaForSequenceClassification" )
4872
4874
class XLMRobertaModel (BertModel ):
4873
4875
model_arch = gguf .MODEL_ARCH .BERT
4876
+ _lora_files = {}
4877
+ _lora_names = []
4874
4878
4875
- def __init__ (self , * args , ** kwargs ):
4876
- super ().__init__ (* args , ** kwargs )
4879
+ def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , ** kwargs : Any ):
4880
+ hparams = kwargs .pop ("hparams" , None )
4881
+ if hparams is None :
4882
+ hparams = ModelBase .load_hparams (dir_model , False )
4883
+
4884
+ if lora_names := hparams .get ("lora_adaptations" ):
4885
+ self ._lora_names = lora_names
4886
+ self .model_arch = gguf .MODEL_ARCH .JINA_BERT_V3
4887
+
4888
+ super ().__init__ (dir_model , ftype , fname_out , hparams = hparams , ** kwargs )
4877
4889
self ._xlmroberta_tokenizer_init ()
4878
4890
4891
+ def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
4892
+ if self ._lora_names :
4893
+ for name in self ._lora_names :
4894
+ fname = self .add_prefix_to_filename (self .fname_out , f"lora-{ name } -" )
4895
+ self ._lora_files [name ] = gguf .GGUFWriter (fname , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file , dry_run = self .dry_run )
4896
+
4897
+ return super ().generate_extra_tensors ()
4898
+
4899
+ def set_type (self ):
4900
+ for lora_writer in self ._lora_files .values ():
4901
+ lora_writer .add_type (gguf .GGUFType .ADAPTER )
4902
+ lora_writer .add_string (gguf .Keys .Adapter .TYPE , "lora" )
4903
+ super ().set_type ()
4904
+
4879
4905
def set_vocab (self ):
4880
4906
self ._xlmroberta_set_vocab ()
4881
4907
@@ -4885,13 +4911,62 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
4885
4911
if name .startswith ("roberta." ):
4886
4912
name = name [8 :]
4887
4913
4914
+ # jina-embeddings-v3
4915
+ if ".parametrizations." in name :
4916
+ name = name .replace (".parametrizations." , "." )
4917
+ if name .endswith (".original" ):
4918
+ name = name [:- 9 ]
4919
+
4888
4920
# position embeddings start at pad_token_id + 1, so just chop down the weight tensor
4889
4921
if name == "embeddings.position_embeddings.weight" :
4890
4922
if self ._position_offset is not None :
4891
4923
data_torch = data_torch [self ._position_offset :,:]
4892
4924
4925
+ if name .endswith (".0.lora_A" ) or name .endswith (".0.lora_B" ):
4926
+ if name .startswith ("pooler.dense" ):
4927
+ return []
4928
+
4929
+ num_loras = data_torch .size (0 )
4930
+ assert num_loras == len (self ._lora_names )
4931
+
4932
+ # Split out each LoRA in their own GGUF
4933
+ for i , lora_writer in enumerate (self ._lora_files .values ()):
4934
+ new_name = self .map_tensor_name (name [:- 9 ]) + name [- 7 :].lower ()
4935
+ data = data_torch [i , :, :]
4936
+ # Transpose/flip token_embd/types into correct shape
4937
+ if new_name == "token_embd.weight.lora_b" :
4938
+ data = data .T
4939
+ elif new_name .startswith ("token_types.weight." ):
4940
+ new_name = new_name [:- 1 ] + ("a" if new_name [- 1 :] == "b" else "b" )
4941
+ lora_writer .add_tensor (new_name , data .float ().numpy (), raw_dtype = gguf .GGMLQuantizationType .F32 )
4942
+
4943
+ return []
4944
+
4893
4945
return super ().modify_tensors (data_torch , name , bid )
4894
4946
4947
+ def set_gguf_parameters (self ):
4948
+ super ().set_gguf_parameters ()
4949
+
4950
+ # jina-embeddings-v3
4951
+ if rotary_emb_base := self .hparams .get ("rotary_emb_base" ):
4952
+ self .gguf_writer .add_rope_freq_base (rotary_emb_base )
4953
+ lora_alpha = self .hparams .get ("lora_alpha" )
4954
+ if lora_prompt_prefixes := self .hparams .get ("task_instructions" ):
4955
+ assert self ._lora_files and all (lora_name in lora_prompt_prefixes for lora_name in self ._lora_files .keys ())
4956
+ for lora_name , lora_writer in self ._lora_files .items ():
4957
+ lora_writer .add_float32 (gguf .Keys .Adapter .LORA_ALPHA , lora_alpha if lora_alpha is not None else 1.0 )
4958
+ lora_writer .add_string (gguf .Keys .Adapter .LORA_TASK_NAME , lora_name )
4959
+ if lora_prompt_prefixes :
4960
+ lora_writer .add_string (gguf .Keys .Adapter .LORA_PROMPT_PREFIX , lora_prompt_prefixes [lora_name ])
4961
+
4962
+ def write (self ):
4963
+ super ().write ()
4964
+ for lora_writer in self ._lora_files .values ():
4965
+ lora_writer .write_header_to_file ()
4966
+ lora_writer .write_kv_data_to_file ()
4967
+ lora_writer .write_tensors_to_file (progress = True )
4968
+ lora_writer .close ()
4969
+
4895
4970
4896
4971
@ModelBase .register ("GemmaForCausalLM" )
4897
4972
class GemmaModel (TextModel ):
0 commit comments