@@ -408,16 +408,15 @@ static results_perplexity perplexity_v2(llama_context * ctx, const common_params
408
408
// clear the KV cache
409
409
llama_kv_cache_clear (ctx);
410
410
411
+ llama_batch batch = llama_batch_init (n_batch, 0 , 1 );
412
+
411
413
for (int j = 0 ; j < num_batches; ++j) {
412
414
const int batch_start = start + j * n_batch;
413
415
const int batch_size = std::min (end - batch_start, n_batch);
414
416
415
- llama_batch batch = llama_batch_init (batch_size, 0 , 1 );
417
+ common_batch_clear (batch );
416
418
for (int i = 0 ; i < batch_size; i++) {
417
- batch. token[i] = tokens[batch_start + i];
418
- batch. pos[i] = j*n_batch + i;
419
- batch.logits [i] = true ;
420
- batch.seq_id [i][0 ] = 0 ;
419
+ common_batch_add (batch, tokens[batch_start + i], j*n_batch + i, {0 }, true );
421
420
}
422
421
423
422
// LOG_DBG(" Batch %d: starts at %d, size is %d, n_past is %d\n",j,batch_start,batch_size,j * n_batch);
@@ -427,8 +426,6 @@ static results_perplexity perplexity_v2(llama_context * ctx, const common_params
427
426
return {tokens, -1 , logit_history, prob_history};
428
427
}
429
428
430
- llama_batch_free (batch);
431
-
432
429
// save original token and restore it after eval
433
430
const auto token_org = tokens[batch_start];
434
431
@@ -445,6 +442,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const common_params
445
442
}
446
443
}
447
444
445
+ llama_batch_free (batch);
446
+
448
447
const auto t_end = std::chrono::high_resolution_clock::now ();
449
448
450
449
if (i == 0 ) {
0 commit comments