@@ -78,7 +78,7 @@ def parse_args() -> argparse.Namespace:
78
78
with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
79
79
hparams = json .load (f )
80
80
81
- if hparams ["architectures" ][0 ] != " FalconForCausalLM" :
81
+ if hparams ["architectures" ][0 ] not in ( "RWForCausalLM" , " FalconForCausalLM") :
82
82
print ("Model architecture not supported: " + hparams ["architectures" ][0 ])
83
83
84
84
sys .exit (1 )
@@ -97,19 +97,26 @@ def parse_args() -> argparse.Namespace:
97
97
98
98
print ("gguf: get model metadata" )
99
99
100
- block_count = hparams ["num_hidden_layers" ]
100
+ block_count = hparams .get ("num_hidden_layers" )
101
+ if block_count is None :
102
+ block_count = hparams ["n_layer" ] # old name
103
+
104
+ n_head = hparams .get ("num_attention_heads" )
105
+ if n_head is None :
106
+ n_head = hparams ["n_head" ] # old name
107
+
108
+ n_head_kv = hparams .get ("num_kv_heads" )
109
+ if n_head_kv is None :
110
+ n_head_kv = hparams .get ("n_head_kv" , 1 ) # old name
101
111
102
112
gguf_writer .add_name ("Falcon" )
103
113
gguf_writer .add_context_length (2048 ) # not in config.json
104
114
gguf_writer .add_tensor_data_layout ("jploski" ) # qkv tensor transform
105
115
gguf_writer .add_embedding_length (hparams ["hidden_size" ])
106
116
gguf_writer .add_feed_forward_length (4 * hparams ["hidden_size" ])
107
117
gguf_writer .add_block_count (block_count )
108
- gguf_writer .add_head_count (hparams ["num_attention_heads" ])
109
- if "num_kv_heads" in hparams :
110
- gguf_writer .add_head_count_kv (hparams ["num_kv_heads" ])
111
- else :
112
- gguf_writer .add_head_count_kv (1 )
118
+ gguf_writer .add_head_count (n_head )
119
+ gguf_writer .add_head_count_kv (n_head_kv )
113
120
gguf_writer .add_layer_norm_eps (hparams ["layer_norm_epsilon" ])
114
121
gguf_writer .add_file_type (ftype )
115
122
@@ -152,10 +159,6 @@ def parse_args() -> argparse.Namespace:
152
159
153
160
tensor_map = gguf .get_tensor_name_map (ARCH ,block_count )
154
161
155
- # params for qkv transform
156
- n_head = hparams ["num_attention_heads" ]
157
- n_head_kv = hparams ["num_kv_heads" ] if "num_kv_heads" in hparams else 1
158
-
159
162
head_dim = hparams ["hidden_size" ] // n_head
160
163
161
164
# tensor info
0 commit comments