From b6fcaf7a851e120592c33f60e86fec7c2a95c7c9 Mon Sep 17 00:00:00 2001 From: Dowon Date: Wed, 9 Jul 2025 13:11:09 +0900 Subject: [PATCH] model : add skt/A.X-4.0 model vocabulary --- convert_hf_to_gguf.py | 3 +++ convert_hf_to_gguf_update.py | 1 + src/llama-vocab.cpp | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 3f3dfb416c1fc..d2a244c3243ca 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -818,6 +818,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664": # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct res = "hunyuan" + if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf": + # ref: https://huggingface.co/skt/A.X-4.0 + res = "a.x-4.0" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 96a2b692a86c1..44ef666f793a7 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -128,6 +128,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", }, {"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", }, {"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", }, + {"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", }, ] # some models are known to be broken upstream, so we will skip them as exceptions diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 551bba171c0e0..e880c02b19c31 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1555,7 +1555,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "jina-de" || tokenizer_pre == "gigachat" || tokenizer_pre == "jina-v2-es" || - tokenizer_pre == "jina-v2-de") { + tokenizer_pre == "jina-v2-de" || + tokenizer_pre == "a.x-4.0") { pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2; } else if ( tokenizer_pre == "jina-v1-en" ||