diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index e539142e6b8cd..4b58043c5899f 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -738,66 +738,70 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d } llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch, bool cont) const { - if (debug > 0) { - const auto & cells = v_cells[seq_to_stream[1]]; - - const uint32_t head_cur = v_heads[1]; - LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", - __func__, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa); + if (debug > 0) { + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + const auto seq_id = ubatch.seq_id_unq[s]; + const auto stream_id = seq_to_stream[seq_id]; + const auto & cells = v_cells[stream_id]; + const uint32_t head_cur = v_heads[stream_id]; + + LLAMA_LOG_DEBUG("%s: stream[%d], n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", + __func__, stream_id, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa); + + if ((debug == 2 && n_swa > 0) || debug > 2) { + std::string ss; + for (uint32_t i = 0; i < cells.size(); ++i) { + if (cells.is_empty(i)) { + ss += '.'; + } else { + assert(cells.seq_count(i) >= 1); - if ((debug == 2 && n_swa > 0) || debug > 2) { - std::string ss; - for (uint32_t i = 0; i < cells.size(); ++i) { - if (cells.is_empty(i)) { - ss += '.'; - } else { - assert(cells.seq_count(i) >= 1); + if (cells.seq_count(i) == 1) { + ss += std::to_string(cells.seq_get(i)); + } else { + ss += 'M'; + } + } + if (i%256 == 255) { + ss += " *"; + ss += '\n'; + } + } + LLAMA_LOG_DEBUG("\n%s\n", ss.c_str()); + } - if (cells.seq_count(i) == 1) { - ss += std::to_string(cells.seq_get(i)); + if ((debug == 2 && n_swa > 0) || debug > 2) { + std::string ss; + for (uint32_t i = 0; i < cells.size(); ++i) { + std::string cur; + if (cells.is_empty(i)) { + cur = '.'; } else { - ss += 'M'; + cur = std::to_string(cells.pos_get(i)); + } + const int n = cur.size(); + for (int j = 0; j < 5 - n; ++j) { + cur += ' '; + } + ss += cur; + if (i%256 == 255) { + ss += " *"; + } + if (i%64 == 63) { + ss += '\n'; } } - if (i%256 == 255) { - ss += " *"; - ss += '\n'; - } + LLAMA_LOG_DEBUG("\n%s\n", ss.c_str()); } - LLAMA_LOG_DEBUG("\n%s\n", ss.c_str()); - } - if ((debug == 2 && n_swa > 0) || debug > 2) { - std::string ss; - for (uint32_t i = 0; i < cells.size(); ++i) { - std::string cur; - if (cells.is_empty(i)) { - cur = '.'; - } else { - cur = std::to_string(cells.pos_get(i)); - } - const int n = cur.size(); - for (int j = 0; j < 5 - n; ++j) { - cur += ' '; - } - ss += cur; - if (i%256 == 255) { - ss += " *"; - } - if (i%64 == 63) { - ss += '\n'; + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { + if (cells.seq_pos_min(s) < 0) { + continue; } - } - LLAMA_LOG_DEBUG("\n%s\n", ss.c_str()); - } - for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { - if (cells.seq_pos_min(s) < 0) { - continue; + LLAMA_LOG_DEBUG("%s: stream[%d] min[%d] = %5d, max[%d] = %5d\n", __func__, stream_id, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s)); } - - LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s)); } }