Skip to content

Commit e78f3ef

Browse files
authored
convert : restore compat with old Falcon models (#3680)
1 parent f3b25e4 commit e78f3ef

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

convert-falcon-hf-to-gguf.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def parse_args() -> argparse.Namespace:
7878
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
7979
hparams = json.load(f)
8080

81-
if hparams["architectures"][0] != "FalconForCausalLM":
81+
if hparams["architectures"][0] not in ("RWForCausalLM", "FalconForCausalLM"):
8282
print("Model architecture not supported: " + hparams["architectures"][0])
8383

8484
sys.exit(1)
@@ -97,19 +97,26 @@ def parse_args() -> argparse.Namespace:
9797

9898
print("gguf: get model metadata")
9999

100-
block_count = hparams["num_hidden_layers"]
100+
block_count = hparams.get("num_hidden_layers")
101+
if block_count is None:
102+
block_count = hparams["n_layer"] # old name
103+
104+
n_head = hparams.get("num_attention_heads")
105+
if n_head is None:
106+
n_head = hparams["n_head"] # old name
107+
108+
n_head_kv = hparams.get("num_kv_heads")
109+
if n_head_kv is None:
110+
n_head_kv = hparams.get("n_head_kv", 1) # old name
101111

102112
gguf_writer.add_name("Falcon")
103113
gguf_writer.add_context_length(2048) # not in config.json
104114
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
105115
gguf_writer.add_embedding_length(hparams["hidden_size"])
106116
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
107117
gguf_writer.add_block_count(block_count)
108-
gguf_writer.add_head_count(hparams["num_attention_heads"])
109-
if "num_kv_heads" in hparams:
110-
gguf_writer.add_head_count_kv(hparams["num_kv_heads"])
111-
else:
112-
gguf_writer.add_head_count_kv(1)
118+
gguf_writer.add_head_count(n_head)
119+
gguf_writer.add_head_count_kv(n_head_kv)
113120
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
114121
gguf_writer.add_file_type(ftype)
115122

@@ -152,10 +159,6 @@ def parse_args() -> argparse.Namespace:
152159

153160
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
154161

155-
# params for qkv transform
156-
n_head = hparams["num_attention_heads"]
157-
n_head_kv = hparams["num_kv_heads"] if "num_kv_heads" in hparams else 1
158-
159162
head_dim = hparams["hidden_size"] // n_head
160163

161164
# tensor info

0 commit comments

Comments
 (0)