Skip to content

Commit 0f479b8

Browse files
committed
Merge branch 'master' into xsn/mtmd_fix_pub_header
2 parents ed4127c + df0c0c7 commit 0f479b8

File tree

74 files changed

+3639
-3562
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+3639
-3562
lines changed

.editorconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,6 @@ charset = unset
4949
trim_trailing_whitespace = unset
5050
insert_final_newline = unset
5151

52-
[tools/mtmd/vendor/miniaudio.h]
52+
[vendor/miniaudio/miniaudio.h]
5353
trim_trailing_whitespace = unset
5454
insert_final_newline = unset

common/CMakeLists.txt

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,20 @@ add_library(${TARGET} STATIC
5858
arg.cpp
5959
arg.h
6060
base64.hpp
61-
chat.cpp
62-
chat.h
6361
chat-parser.cpp
6462
chat-parser.h
63+
chat.cpp
64+
chat.h
6565
common.cpp
6666
common.h
6767
console.cpp
6868
console.h
69-
json-schema-to-grammar.cpp
70-
json.hpp
71-
json-partial.h
7269
json-partial.cpp
70+
json-partial.h
71+
json-schema-to-grammar.cpp
7372
llguidance.cpp
7473
log.cpp
7574
log.h
76-
minja/chat-template.hpp
77-
minja/minja.hpp
7875
ngram-cache.cpp
7976
ngram-cache.h
8077
regex-partial.cpp
@@ -147,7 +144,7 @@ if (LLAMA_LLGUIDANCE)
147144
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
148145
endif ()
149146

150-
target_include_directories(${TARGET} PUBLIC .)
147+
target_include_directories(${TARGET} PUBLIC . ../vendor)
151148
target_compile_features (${TARGET} PUBLIC cxx_std_17)
152149
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
153150

common/arg.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
#include "gguf.h" // for reading GGUF splits
21
#include "arg.h"
32

3+
#include "chat.h"
44
#include "common.h"
5+
#include "gguf.h" // for reading GGUF splits
6+
#include "json-schema-to-grammar.h"
57
#include "log.h"
68
#include "sampling.h"
7-
#include "chat.h"
89

910
// fix problem with std::min and std::max
1011
#if defined(_WIN32)
@@ -15,6 +16,9 @@
1516
#include <windows.h>
1617
#endif
1718

19+
#define JSON_ASSERT GGML_ASSERT
20+
#include <nlohmann/json.hpp>
21+
1822
#include <algorithm>
1923
#include <climits>
2024
#include <cstdarg>
@@ -34,8 +38,6 @@
3438
#include <future>
3539
#endif
3640

37-
#include "json-schema-to-grammar.h"
38-
3941
using json = nlohmann::ordered_json;
4042

4143
std::initializer_list<enum llama_example> mmproj_examples = {

common/chat-parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
#include "chat.h"
44
#include "json-partial.h"
5-
#include "json.hpp"
65
#include "regex-partial.h"
76

7+
#include <nlohmann/json.hpp>
8+
89
#include <optional>
910
#include <string>
1011
#include <vector>

common/chat.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#include "chat.h"
22
#include "chat-parser.h"
33
#include "common.h"
4+
#include "json-partial.h"
45
#include "json-schema-to-grammar.h"
56
#include "log.h"
6-
#include "json-partial.h"
7-
#include "minja/chat-template.hpp"
8-
#include "minja/minja.hpp"
97
#include "regex-partial.h"
108

9+
#include <minja/chat-template.hpp>
10+
#include <minja/minja.hpp>
11+
1112
#include <cstdio>
1213
#include <exception>
1314
#include <iostream>
@@ -16,7 +17,6 @@
1617
#include <string>
1718
#include <vector>
1819

19-
2020
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
2121
auto time = std::chrono::system_clock::to_time_t(now);
2222
auto local_time = *std::localtime(&time);

common/json-partial.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
#include <json-partial.h>
2-
#include "ggml.h"
1+
#include "json-partial.h"
2+
33
#include "log.h"
4-
#include <string>
54

6-
#include <json.hpp>
5+
#include <nlohmann/json.hpp>
6+
7+
#include <string>
78

89
using json = nlohmann::ordered_json;
910

common/json-partial.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
2-
#include <json.hpp>
2+
3+
#include <nlohmann/json.hpp>
34

45
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
56
struct common_healing_marker {

common/json-schema-to-grammar.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#include "json-schema-to-grammar.h"
22
#include "common.h"
33

4+
#include <nlohmann/json.hpp>
5+
46
#include <algorithm>
5-
#include <fstream>
67
#include <map>
78
#include <regex>
89
#include <sstream>

common/json-schema-to-grammar.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#pragma once
22

3-
#include "ggml.h"
4-
// Change JSON_ASSERT from assert() to GGML_ASSERT:
5-
#define JSON_ASSERT GGML_ASSERT
6-
#include "json.hpp"
3+
#include <nlohmann/json_fwd.hpp>
4+
5+
#include <functional>
6+
#include <string>
77

88
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
99
bool force_gbnf = false);

convert_hf_to_gguf.py

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -523,15 +523,15 @@ def set_gguf_parameters(self):
523523
self.gguf_writer.add_context_length(n_ctx)
524524
logger.info(f"gguf: context length = {n_ctx}")
525525

526-
if (n_embd := self.find_hparam(["hidden_size", "n_embd"], optional=True)) is not None:
526+
if (n_embd := self.find_hparam(["hidden_size", "n_embd", "dim"], optional=True)) is not None:
527527
self.gguf_writer.add_embedding_length(n_embd)
528528
logger.info(f"gguf: embedding length = {n_embd}")
529529

530-
if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
530+
if (n_ff := self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"], optional=True)) is not None:
531531
self.gguf_writer.add_feed_forward_length(n_ff)
532532
logger.info(f"gguf: feed forward length = {n_ff}")
533533

534-
if (n_head := self.find_hparam(["num_attention_heads", "n_head"], optional=True)) is not None:
534+
if (n_head := self.find_hparam(["num_attention_heads", "n_head", "n_heads"], optional=True)) is not None:
535535
self.gguf_writer.add_head_count(n_head)
536536
logger.info(f"gguf: head count = {n_head}")
537537

@@ -674,12 +674,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
674674
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
675675
# ref: https://huggingface.co/tiiuae/falcon-7b
676676
res = "falcon"
677-
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
678-
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
679-
res = "falcon3"
680677
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
681678
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
682679
res = "bert-bge"
680+
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
681+
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
682+
res = "falcon3"
683683
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
684684
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
685685
res = "bert-bge-large"
@@ -731,9 +731,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
731731
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
732732
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
733733
res = "jina-v2-code"
734-
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
735-
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
736-
res = "chatglm-bpe"
737734
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
738735
# ref: https://huggingface.co/LumiOpen/Viking-7B
739736
res = "viking"
@@ -764,9 +761,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
764761
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
765762
# ref: https://huggingface.co/facebook/chameleon-7b
766763
res = "chameleon"
767-
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
768-
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
769-
res = "minerva-7b"
770764
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
771765
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
772766
res = "roberta-bpe"
@@ -797,15 +791,24 @@ def get_vocab_base_pre(self, tokenizer) -> str:
797791
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
798792
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
799793
res = "llama4"
800-
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
801-
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
802-
res = "glm4"
803794
if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3":
804795
# ref: https://huggingface.co/mistral-community/pixtral-12b
805796
res = "pixtral"
806797
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
807798
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
808799
res = "seed-coder"
800+
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
801+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
802+
res = "chatglm-bpe"
803+
if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
804+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
805+
res = "chatglm-bpe"
806+
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
807+
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
808+
res = "glm4"
809+
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
810+
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
811+
res = "minerva-7b"
809812

810813
if res is None:
811814
logger.warning("\n")
@@ -1044,6 +1047,10 @@ def _set_vocab_rwkv_world(self):
10441047
special_vocab.chat_template = "rwkv-world"
10451048
# hack: Add '\n\n' as the EOT token to make it chat normally
10461049
special_vocab._set_special_token("eot", 261)
1050+
# hack: Override these as they have already been set (incorrectly)
1051+
special_vocab.special_token_ids["bos"] = 0
1052+
special_vocab.special_token_ids["eos"] = 0
1053+
10471054
special_vocab.add_to_gguf(self.gguf_writer)
10481055

10491056
def _set_vocab_builtin(self, model_name: Literal["gpt-neox", "llama-spm"], vocab_size: int):
@@ -3907,6 +3914,26 @@ def _xlmroberta_set_vocab(self) -> None:
39073914
self.gguf_writer.add_add_eos_token(True)
39083915

39093916

3917+
@ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
3918+
class DistilBertModel(BertModel):
3919+
model_arch = gguf.MODEL_ARCH.BERT
3920+
3921+
def set_gguf_parameters(self):
3922+
self.gguf_writer.add_layer_norm_eps(1e-12)
3923+
logger.info("gguf: layer norm epsilon = 1e-12")
3924+
super().set_gguf_parameters()
3925+
3926+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3927+
if name.startswith("distilbert."):
3928+
name = name[11:]
3929+
3930+
# These layers act as MLM head, so we don't need them
3931+
if name.startswith("vocab_"):
3932+
return []
3933+
3934+
return super().modify_tensors(data_torch, name, bid)
3935+
3936+
39103937
@ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
39113938
class RobertaModel(BertModel):
39123939
model_arch = gguf.MODEL_ARCH.BERT

0 commit comments

Comments
 (0)