fix server

ngxson · ngxson · commit 56e82d024435 · 2025-03-25T19:16:19.000+01:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -1959,7 +1959,7 @@ struct server_context {
 
         // the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens
         // note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
-        batch.clear();
+        batch = llama_batch_ext_ptr(ctx);
         metrics.init();
     }
 

Original file line number	Diff line number	Diff line change
`@@ -1959,7 +1959,7 @@ struct server_context {`
`1959`	`1959`
`1960`	`1960`	`// the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens`
`1961`	`1961`	`// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)`
`1962`		`- batch.clear();`
	`1962`	`+ batch = llama_batch_ext_ptr(ctx);`
`1963`	`1963`	`metrics.init();`
`1964`	`1964`	`}`
`1965`	`1965`