Skip to content

Commit 0db72b6

Browse files
committed
server : fix non-transformer logic + remove response from /props
1 parent 9ec6b49 commit 0db72b6

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

examples/server/server.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2082,10 +2082,13 @@ struct server_context {
20822082

20832083
// keep only the common part
20842084
int p0 = slot.n_past;
2085+
20852086
if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, p0, -1)) {
20862087
// could not partially delete (likely using a non-Transformer model)
20872088
llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1);
20882089

2090+
p0 = 0;
2091+
20892092
// there is no common part left
20902093
slot.n_past = 0;
20912094
slot.n_past_se = 0;
@@ -2773,7 +2776,6 @@ int main(int argc, char ** argv) {
27732776

27742777
const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
27752778
json data = {
2776-
{ "system_prompt", "[unavailable]" },
27772779
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
27782780
{ "total_slots", ctx_server.params.n_parallel },
27792781
{ "chat_template", llama_get_chat_template(ctx_server.model) },

0 commit comments

Comments
 (0)