ggml-org · ggerganov · Jan 4, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/autoload/llama.vim b/autoload/llama.vim
@@ -124,15 +124,15 @@ function! llama#init()
 
     augroup llama
         autocmd!
-        autocmd InsertEnter     * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false)
+        autocmd InsertEnter     * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false, v:false)
         autocmd InsertLeavePre  * call llama#fim_cancel()
 
         autocmd CursorMoved     * call s:on_move()
         autocmd CursorMovedI    * call s:on_move()
         autocmd CompleteChanged * call llama#fim_cancel()
 
         if g:llama_config.auto_fim
-            autocmd CursorMovedI * call llama#fim(v:true)
+            autocmd CursorMovedI * call llama#fim(v:true, v:true)
         endif
 
         " gather chunks upon yanking
@@ -332,14 +332,14 @@ function! s:ring_update()
 endfunction
 
 " necessary for 'inoremap <expr>'
-function! llama#fim_inline(is_auto) abort
-    call llama#fim(a:is_auto)
+function! llama#fim_inline(is_auto, cache) abort
+    call llama#fim(a:is_auto, a:cache)
     return ''
 endfunction
 
 " the main FIM call
 " takes local context around the cursor and sends it together with the extra context to the server for completion
-function! llama#fim(is_auto) abort
+function! llama#fim(is_auto, cache) abort
     " we already have a suggestion for the current cursor position
     if s:hint_shown && !a:is_auto
         call llama#fim_cancel()
@@ -356,7 +356,7 @@ function! llama#fim(is_auto) abort
         endif
 
         let s:t_fim_start = reltime()
-        let s:timer_fim = timer_start(600, {-> llama#fim(v:true)})
+        let s:timer_fim = timer_start(600, {-> llama#fim(v:true, a:cache)})
         return
     endif
 
@@ -445,26 +445,53 @@ function! llama#fim(is_auto) abort
     endif
     let s:job_error = 0
 
-    " Construct hash from prefix, suffix, and prompt
-    let l:request_context = l:prefix . "|" . l:suffix . "|" . l:prompt
-    let l:hash = sha256(l:request_context) 
-
-    " Check if the completion is cached
-    let l:cached_completion = get(g:result_cache, l:hash , v:null)
+    " Construct hash from prefix, prompt, and suffix
+    let l:request_context = l:prefix . l:prompt . l:suffix
+    let l:hash = sha256(l:request_context)
+
+    if a:cache
+        " Check if the completion is cached
+        let l:cached_completion = get(g:result_cache, l:hash , v:null)
+
+        " ... or if there is a cached completion nearby (10 characters behind)
+        " Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)
+        " then it checks that the characters typed after (x,y) match up with the cached completion result.
+        if l:cached_completion == v:null
+            let l:past_text = l:prefix . l:prompt
+            for i in range(10)
+                    let l:hash_txt = l:past_text[:-(2+i)] . l:suffix
+                    let l:temp_hash = sha256(l:hash_txt)
+                    if has_key(g:result_cache, l:temp_hash)
+                        let l:temp_cached_completion = get(g:result_cache, l:temp_hash)
+                        if  l:temp_cached_completion == ""
+                            break
+                        endif
+                        let l:response = json_decode(l:temp_cached_completion)
+                        if l:response['content'][0:len(l:past_text[-(1+i):])-1] !=# l:past_text[-(1+i):]
+                            break
+                        endif
+                        let l:response['content']  = l:response['content'][i+1:]
+                        let g:result_cache[l:hash] = json_encode(l:response)
+                        let l:cached_completion = g:result_cache[l:hash]
+                        break
+                    endif
+            endfor
+        endif
+    endif
 
-    if l:cached_completion != v:null
-        call s:fim_on_stdout(l:hash, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
+    if a:cache && l:cached_completion != v:null
+        call s:fim_on_stdout(l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
     else
         " send the request asynchronously
         if s:ghost_text_nvim
             let s:current_job = jobstart(l:curl_command, {
-                \ 'on_stdout': function('s:fim_on_stdout', [l:hash, s:pos_x, s:pos_y, a:is_auto]),
+                \ 'on_stdout': function('s:fim_on_stdout', [l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto]),
                 \ 'on_exit':   function('s:fim_on_exit'),
                 \ 'stdout_buffered': v:true
                 \ })
         elseif s:ghost_text_vim
             let s:current_job = job_start(l:curl_command, {
-                \ 'out_cb':    function('s:fim_on_stdout', [l:hash, s:pos_x, s:pos_y, a:is_auto]),
+                \ 'out_cb':    function('s:fim_on_stdout', [l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto]),
                 \ 'exit_cb':   function('s:fim_on_exit')
                 \ })
         endif
@@ -552,25 +579,28 @@ function! s:on_move()
     call llama#fim_cancel()
 endfunction
 
+" TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU).
+function! s:insert_cache(key, value)
+    if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
+        let l:keys = keys(g:result_cache)
+        let l:hash = l:keys[rand() % len(l:keys)]
+        call remove(g:result_cache, l:hash)
+    endif
+    let g:result_cache[a:key] = a:value
+endfunction
+
 " callback that processes the FIM result from the server and displays the suggestion
-function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:null)
+function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, event = v:null)
     " Retrieve the FIM result from cache
-    " TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU).
-    if has_key(g:result_cache, a:hash)
+    if a:cache && has_key(g:result_cache, a:hash)
         let l:raw = get(g:result_cache, a:hash)
     else
         if s:ghost_text_nvim
             let l:raw = join(a:data, "\n")
         elseif s:ghost_text_vim
             let l:raw = a:data
         endif
-
-        if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
-            let l:keys = keys(g:result_cache)
-            let l:hash = l:keys[rand() % len(l:keys)]
-            call remove(g:result_cache, l:hash)
-        endif
-        let g:result_cache[a:hash] = l:raw
+        call s:insert_cache(a:hash, l:raw)
     endif
 
     if a:pos_x != col('.') - 1 || a:pos_y != line('.')