From ed9ca474acf8177c55eff80acd6d5d3df2bab3b4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 2 Jul 2025 08:29:55 +0300 Subject: [PATCH 1/2] simple-chat : fix context-exceeded condition ggml-ci --- examples/simple-chat/simple-chat.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp index cf1178043d8d1..b0bfc457fc345 100644 --- a/examples/simple-chat/simple-chat.cpp +++ b/examples/simple-chat/simple-chat.cpp @@ -114,14 +114,15 @@ int main(int argc, char ** argv) { // check if we have enough space in the context to evaluate this batch int n_ctx = llama_n_ctx(ctx); int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0); - if (n_ctx_used + batch.n_tokens > n_ctx) { + if (n_ctx_used + batch.n_tokens >= n_ctx) { printf("\033[0m\n"); fprintf(stderr, "context size exceeded\n"); exit(0); } - if (llama_decode(ctx, batch)) { - GGML_ABORT("failed to decode\n"); + int ret = llama_decode(ctx, batch); + if (ret != 0) { + GGML_ABORT("failed to decode, ret = %d\n", ret); } // sample the next token From 6bd73e3e4bad99c31fbe64df989abff198bd5c1b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 2 Jul 2025 14:08:28 +0300 Subject: [PATCH 2/2] cont : fix n_ctx_used computation ggml-ci --- examples/simple-chat/simple-chat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp index b0bfc457fc345..57195df331628 100644 --- a/examples/simple-chat/simple-chat.cpp +++ b/examples/simple-chat/simple-chat.cpp @@ -113,8 +113,8 @@ int main(int argc, char ** argv) { while (true) { // check if we have enough space in the context to evaluate this batch int n_ctx = llama_n_ctx(ctx); - int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0); - if (n_ctx_used + batch.n_tokens >= n_ctx) { + int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) + 1; + if (n_ctx_used + batch.n_tokens > n_ctx) { printf("\033[0m\n"); fprintf(stderr, "context size exceeded\n"); exit(0);