server : fix non-transformer logic + remove response from /props

ggerganov · ggerganov · commit 0db72b63f54e · 2024-10-12T09:21:41.000+03:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -2082,10 +2082,13 @@ struct server_context {
 
                     // keep only the common part
                     int p0 = slot.n_past;
+
                     if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, p0, -1)) {
                         // could not partially delete (likely using a non-Transformer model)
                         llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1);
 
+                        p0 = 0;
+
                         // there is no common part left
                         slot.n_past = 0;
                         slot.n_past_se = 0;
@@ -2773,7 +2776,6 @@ int main(int argc, char ** argv) {
 
     const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
         json data = {
-            { "system_prompt",               "[unavailable]" },
             { "default_generation_settings", ctx_server.default_generation_settings_for_props },
             { "total_slots",                 ctx_server.params.n_parallel },
             { "chat_template",               llama_get_chat_template(ctx_server.model) },