We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 42062cc commit 56e82d0Copy full SHA for 56e82d0
examples/server/server.cpp
@@ -1959,7 +1959,7 @@ struct server_context {
1959
1960
// the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens
1961
// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
1962
- batch.clear();
+ batch = llama_batch_ext_ptr(ctx);
1963
metrics.init();
1964
}
1965
0 commit comments