Skip to content

cache: keep cached suggestions #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 4, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 56 additions & 26 deletions autoload/llama.vim
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,15 @@ function! llama#init()

augroup llama
autocmd!
autocmd InsertEnter * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false)
autocmd InsertEnter * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false, v:false)
autocmd InsertLeavePre * call llama#fim_cancel()

autocmd CursorMoved * call s:on_move()
autocmd CursorMovedI * call s:on_move()
autocmd CompleteChanged * call llama#fim_cancel()

if g:llama_config.auto_fim
autocmd CursorMovedI * call llama#fim(v:true)
autocmd CursorMovedI * call llama#fim(v:true, v:true)
endif

" gather chunks upon yanking
Expand Down Expand Up @@ -332,14 +332,14 @@ function! s:ring_update()
endfunction

" necessary for 'inoremap <expr>'
function! llama#fim_inline(is_auto) abort
call llama#fim(a:is_auto)
function! llama#fim_inline(is_auto, cache) abort
call llama#fim(a:is_auto, a:cache)
return ''
endfunction

" the main FIM call
" takes local context around the cursor and sends it together with the extra context to the server for completion
function! llama#fim(is_auto) abort
function! llama#fim(is_auto, cache) abort
" we already have a suggestion for the current cursor position
if s:hint_shown && !a:is_auto
call llama#fim_cancel()
Expand All @@ -356,7 +356,7 @@ function! llama#fim(is_auto) abort
endif

let s:t_fim_start = reltime()
let s:timer_fim = timer_start(600, {-> llama#fim(v:true)})
let s:timer_fim = timer_start(600, {-> llama#fim(v:true, a:cache)})
return
endif

Expand Down Expand Up @@ -445,26 +445,53 @@ function! llama#fim(is_auto) abort
endif
let s:job_error = 0

" Construct hash from prefix, suffix, and prompt
let l:request_context = l:prefix . "|" . l:suffix . "|" . l:prompt
let l:hash = sha256(l:request_context)

" Check if the completion is cached
let l:cached_completion = get(g:result_cache, l:hash , v:null)
" Construct hash from prefix, prompt, and suffix
let l:request_context = l:prefix . l:prompt . l:suffix
let l:hash = sha256(l:request_context)

if a:cache
" Check if the completion is cached
let l:cached_completion = get(g:result_cache, l:hash , v:null)

" ... or if there is a cached completion nearby (10 characters behind)
" Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)
" then it checks that the characters typed after (x,y) match up with the cached completion result.
if l:cached_completion == v:null
let l:past_text = l:prefix . l:prompt
for i in range(10)
let l:hash_txt = l:past_text[:-(2+i)] . l:suffix
let l:temp_hash = sha256(l:hash_txt)
if has_key(g:result_cache, l:temp_hash)
let l:temp_cached_completion = get(g:result_cache, l:temp_hash)
if l:temp_cached_completion == ""
break
endif
let l:response = json_decode(l:temp_cached_completion)
if l:response['content'][0:len(l:past_text[-(1+i):])-1] !=# l:past_text[-(1+i):]
break
endif
let l:response['content'] = l:response['content'][i+1:]
let g:result_cache[l:hash] = json_encode(l:response)
let l:cached_completion = g:result_cache[l:hash]
break
endif
endfor
endif
endif

if l:cached_completion != v:null
call s:fim_on_stdout(l:hash, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
if a:cache && l:cached_completion != v:null
call s:fim_on_stdout(l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
else
" send the request asynchronously
if s:ghost_text_nvim
let s:current_job = jobstart(l:curl_command, {
\ 'on_stdout': function('s:fim_on_stdout', [l:hash, s:pos_x, s:pos_y, a:is_auto]),
\ 'on_stdout': function('s:fim_on_stdout', [l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto]),
\ 'on_exit': function('s:fim_on_exit'),
\ 'stdout_buffered': v:true
\ })
elseif s:ghost_text_vim
let s:current_job = job_start(l:curl_command, {
\ 'out_cb': function('s:fim_on_stdout', [l:hash, s:pos_x, s:pos_y, a:is_auto]),
\ 'out_cb': function('s:fim_on_stdout', [l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto]),
\ 'exit_cb': function('s:fim_on_exit')
\ })
endif
Expand Down Expand Up @@ -552,25 +579,28 @@ function! s:on_move()
call llama#fim_cancel()
endfunction

" TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU).
function! s:insert_cache(key, value)
if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
let l:keys = keys(g:result_cache)
let l:hash = l:keys[rand() % len(l:keys)]
call remove(g:result_cache, l:hash)
endif
let g:result_cache[a:key] = a:value
endfunction

" callback that processes the FIM result from the server and displays the suggestion
function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:null)
function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, event = v:null)
" Retrieve the FIM result from cache
" TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU).
if has_key(g:result_cache, a:hash)
if a:cache && has_key(g:result_cache, a:hash)
let l:raw = get(g:result_cache, a:hash)
else
if s:ghost_text_nvim
let l:raw = join(a:data, "\n")
elseif s:ghost_text_vim
let l:raw = a:data
endif

if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
let l:keys = keys(g:result_cache)
let l:hash = l:keys[rand() % len(l:keys)]
call remove(g:result_cache, l:hash)
endif
let g:result_cache[a:hash] = l:raw
call s:insert_cache(a:hash, l:raw)
endif

if a:pos_x != col('.') - 1 || a:pos_y != line('.')
Expand Down