From b113650f8f4d0e275bef4c2b9e864772e7aad472 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 2 Jan 2025 22:08:34 -0600
Subject: [PATCH 1/5] check nearby cached completions

---
 autoload/llama.vim | 51 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/autoload/llama.vim b/autoload/llama.vim
index f9591b9..6417cf8 100644
--- a/autoload/llama.vim
+++ b/autoload/llama.vim
@@ -361,7 +361,6 @@ function! llama#fim(is_auto) abort
     endif
 
     let s:t_fim_start = reltime()
-
     let s:content = []
     let s:can_accept = v:false
 
@@ -445,14 +444,41 @@ function! llama#fim(is_auto) abort
     endif
     let s:job_error = 0
 
-    " Construct hash from prefix, suffix, and prompt
-    let l:request_context = l:prefix . "|" . l:suffix . "|" . l:prompt
+    " Construct hash from prefix, prompt, and suffix
+    let l:request_context = l:prefix . l:prompt . l:suffix
     let l:hash = sha256(l:request_context) 
 
     " Check if the completion is cached
     let l:cached_completion = get(g:result_cache, l:hash , v:null)
 
+
+    " ... or if there is a cached completion nearby (10 characters behind)
+    " Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)
+    " then it checks that the characters typed after (x,y) match up with the cached completion result.
+    if l:cached_completion == v:null
+        let l:past_text = l:prefix . l:prompt
+        for i in range(10)
+                let l:hash_txt = l:past_text[:-(2+i)] . l:suffix
+                let l:temp_hash = sha256(l:hash_txt)
+                if has_key(g:result_cache, l:temp_hash)
+                    let l:temp_cached_completion = get(g:result_cache, l:temp_hash)
+                    if  l:temp_cached_completion == ""
+                        break
+                    endif
+                    let l:response = json_decode(l:temp_cached_completion)
+                    if l:response['content'][0:len(l:past_text[-(1+i):])-1] !=# l:past_text[-(1+i):]
+                        break
+                    endif
+                    let l:response['content']  = l:response['content'][i+1:]
+                    let g:result_cache[l:hash] = json_encode(l:response)
+                    let l:cached_completion = g:result_cache[l:hash]
+                    break
+                endif
+        endfor
+    endif
+    
     if l:cached_completion != v:null
+
         call s:fim_on_stdout(l:hash, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
     else
         " send the request asynchronously
@@ -552,6 +578,15 @@ function! s:on_move()
     call llama#fim_cancel()
 endfunction
 
+function! s:insert_cache(key, value)
+    if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
+        let l:keys = keys(g:result_cache)
+        let l:hash = l:keys[rand() % len(l:keys)]
+        call remove(g:result_cache, l:hash)
+    endif
+    let g:result_cache[a:key] = a:value
+endfunction
+
 " callback that processes the FIM result from the server and displays the suggestion
 function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:null)
     " Retrieve the FIM result from cache
@@ -564,13 +599,7 @@ function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:n
         elseif s:ghost_text_vim
             let l:raw = a:data
         endif
-
-        if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
-            let l:keys = keys(g:result_cache)
-            let l:hash = l:keys[rand() % len(l:keys)]
-            call remove(g:result_cache, l:hash)
-        endif
-        let g:result_cache[a:hash] = l:raw
+        call s:insert_cache(a:hash, l:raw)
     endif
 
     if a:pos_x != col('.') - 1 || a:pos_y != line('.')
@@ -607,6 +636,7 @@ function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:n
     let l:t_predict_ms = 1.0
     let l:s_predict    = 0
 
+
     " get the generated suggestion
     if s:can_accept
         let l:response = json_decode(l:raw)
@@ -709,6 +739,7 @@ function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:n
 
     let s:content[-1] .= s:line_cur_suffix
 
+
     call llama#fim_cancel()
 
     " display virtual text with the suggestion

From 64434e94e4896fba722380fe548a7c3b66f361e8 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 2 Jan 2025 22:12:44 -0600
Subject: [PATCH 2/5] coding style

---
 autoload/llama.vim | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/autoload/llama.vim b/autoload/llama.vim
index 6417cf8..08dd05f 100644
--- a/autoload/llama.vim
+++ b/autoload/llama.vim
@@ -361,6 +361,7 @@ function! llama#fim(is_auto) abort
     endif
 
     let s:t_fim_start = reltime()
+
     let s:content = []
     let s:can_accept = v:false
 
@@ -451,7 +452,6 @@ function! llama#fim(is_auto) abort
     " Check if the completion is cached
     let l:cached_completion = get(g:result_cache, l:hash , v:null)
 
-
     " ... or if there is a cached completion nearby (10 characters behind)
     " Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)
     " then it checks that the characters typed after (x,y) match up with the cached completion result.
@@ -578,6 +578,7 @@ function! s:on_move()
     call llama#fim_cancel()
 endfunction
 
+" TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU).
 function! s:insert_cache(key, value)
     if len(keys(g:result_cache)) > (g:llama_config.max_cache_keys - 1)
         let l:keys = keys(g:result_cache)
@@ -590,7 +591,6 @@ endfunction
 " callback that processes the FIM result from the server and displays the suggestion
 function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:null)
     " Retrieve the FIM result from cache
-    " TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU).
     if has_key(g:result_cache, a:hash)
         let l:raw = get(g:result_cache, a:hash)
     else
@@ -636,7 +636,6 @@ function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:n
     let l:t_predict_ms = 1.0
     let l:s_predict    = 0
 
-
     " get the generated suggestion
     if s:can_accept
         let l:response = json_decode(l:raw)
@@ -739,7 +738,6 @@ function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:n
 
     let s:content[-1] .= s:line_cur_suffix
 
-
     call llama#fim_cancel()
 
     " display virtual text with the suggestion

From aa10097d4a616145440064164d304c0e050c07fc Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 2 Jan 2025 22:21:58 -0600
Subject: [PATCH 3/5] coding style

---
 autoload/llama.vim | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autoload/llama.vim b/autoload/llama.vim
index 08dd05f..41dec8f 100644
--- a/autoload/llama.vim
+++ b/autoload/llama.vim
@@ -478,7 +478,6 @@ function! llama#fim(is_auto) abort
     endif
     
     if l:cached_completion != v:null
-
         call s:fim_on_stdout(l:hash, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
     else
         " send the request asynchronously

From e5f3160a8974ed3ece89cb50e4884bd9dfa83e76 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Fri, 3 Jan 2025 10:01:55 -0600
Subject: [PATCH 4/5] Disable cache when gen new suggestion with CTRL+F

---
 autoload/llama.vim | 82 ++++++++++++++++++++++++----------------------
 1 file changed, 43 insertions(+), 39 deletions(-)

diff --git a/autoload/llama.vim b/autoload/llama.vim
index 41dec8f..2c2bd47 100644
--- a/autoload/llama.vim
+++ b/autoload/llama.vim
@@ -124,7 +124,7 @@ function! llama#init()
 
     augroup llama
         autocmd!
-        autocmd InsertEnter     * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false)
+        autocmd InsertEnter     * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false, v:false)
         autocmd InsertLeavePre  * call llama#fim_cancel()
 
         autocmd CursorMoved     * call s:on_move()
@@ -132,7 +132,7 @@ function! llama#init()
         autocmd CompleteChanged * call llama#fim_cancel()
 
         if g:llama_config.auto_fim
-            autocmd CursorMovedI * call llama#fim(v:true)
+            autocmd CursorMovedI * call llama#fim(v:true, v:true)
         endif
 
         " gather chunks upon yanking
@@ -332,14 +332,14 @@ function! s:ring_update()
 endfunction
 
 " necessary for 'inoremap <expr>'
-function! llama#fim_inline(is_auto) abort
-    call llama#fim(a:is_auto)
+function! llama#fim_inline(is_auto, cache) abort
+    call llama#fim(a:is_auto, a:cache)
     return ''
 endfunction
 
 " the main FIM call
 " takes local context around the cursor and sends it together with the extra context to the server for completion
-function! llama#fim(is_auto) abort
+function! llama#fim(is_auto, cache) abort
     " we already have a suggestion for the current cursor position
     if s:hint_shown && !a:is_auto
         call llama#fim_cancel()
@@ -356,7 +356,7 @@ function! llama#fim(is_auto) abort
         endif
 
         let s:t_fim_start = reltime()
-        let s:timer_fim = timer_start(600, {-> llama#fim(v:true)})
+        let s:timer_fim = timer_start(600, {-> llama#fim(v:true, a:cache)})
         return
     endif
 
@@ -447,49 +447,51 @@ function! llama#fim(is_auto) abort
 
     " Construct hash from prefix, prompt, and suffix
     let l:request_context = l:prefix . l:prompt . l:suffix
-    let l:hash = sha256(l:request_context) 
-
-    " Check if the completion is cached
-    let l:cached_completion = get(g:result_cache, l:hash , v:null)
-
-    " ... or if there is a cached completion nearby (10 characters behind)
-    " Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)
-    " then it checks that the characters typed after (x,y) match up with the cached completion result.
-    if l:cached_completion == v:null
-        let l:past_text = l:prefix . l:prompt
-        for i in range(10)
-                let l:hash_txt = l:past_text[:-(2+i)] . l:suffix
-                let l:temp_hash = sha256(l:hash_txt)
-                if has_key(g:result_cache, l:temp_hash)
-                    let l:temp_cached_completion = get(g:result_cache, l:temp_hash)
-                    if  l:temp_cached_completion == ""
+    let l:hash = sha256(l:request_context)
+
+    if a:cache
+        " Check if the completion is cached
+        let l:cached_completion = get(g:result_cache, l:hash , v:null)
+
+        " ... or if there is a cached completion nearby (10 characters behind)
+        " Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)
+        " then it checks that the characters typed after (x,y) match up with the cached completion result.
+        if l:cached_completion == v:null
+            let l:past_text = l:prefix . l:prompt
+            for i in range(10)
+                    let l:hash_txt = l:past_text[:-(2+i)] . l:suffix
+                    let l:temp_hash = sha256(l:hash_txt)
+                    if has_key(g:result_cache, l:temp_hash)
+                        let l:temp_cached_completion = get(g:result_cache, l:temp_hash)
+                        if  l:temp_cached_completion == ""
+                            break
+                        endif
+                        let l:response = json_decode(l:temp_cached_completion)
+                        if l:response['content'][0:len(l:past_text[-(1+i):])-1] !=# l:past_text[-(1+i):]
+                            break
+                        endif
+                        let l:response['content']  = l:response['content'][i+1:]
+                        let g:result_cache[l:hash] = json_encode(l:response)
+                        let l:cached_completion = g:result_cache[l:hash]
                         break
                     endif
-                    let l:response = json_decode(l:temp_cached_completion)
-                    if l:response['content'][0:len(l:past_text[-(1+i):])-1] !=# l:past_text[-(1+i):]
-                        break
-                    endif
-                    let l:response['content']  = l:response['content'][i+1:]
-                    let g:result_cache[l:hash] = json_encode(l:response)
-                    let l:cached_completion = g:result_cache[l:hash]
-                    break
-                endif
-        endfor
+            endfor
+        endif
     endif
-    
-    if l:cached_completion != v:null
-        call s:fim_on_stdout(l:hash, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
+
+    if a:cache && l:cached_completion != v:null
+        call s:fim_on_stdout(l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto, 0, l:cached_completion)
     else
         " send the request asynchronously
         if s:ghost_text_nvim
             let s:current_job = jobstart(l:curl_command, {
-                \ 'on_stdout': function('s:fim_on_stdout', [l:hash, s:pos_x, s:pos_y, a:is_auto]),
+                \ 'on_stdout': function('s:fim_on_stdout', [l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto]),
                 \ 'on_exit':   function('s:fim_on_exit'),
                 \ 'stdout_buffered': v:true
                 \ })
         elseif s:ghost_text_vim
             let s:current_job = job_start(l:curl_command, {
-                \ 'out_cb':    function('s:fim_on_stdout', [l:hash, s:pos_x, s:pos_y, a:is_auto]),
+                \ 'out_cb':    function('s:fim_on_stdout', [l:hash, a:cache, s:pos_x, s:pos_y, a:is_auto]),
                 \ 'exit_cb':   function('s:fim_on_exit')
                 \ })
         endif
@@ -588,11 +590,13 @@ function! s:insert_cache(key, value)
 endfunction
 
 " callback that processes the FIM result from the server and displays the suggestion
-function! s:fim_on_stdout(hash, pos_x, pos_y, is_auto, job_id, data, event = v:null)
+function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, event = v:null)
     " Retrieve the FIM result from cache
-    if has_key(g:result_cache, a:hash)
+    if a:cache && has_key(g:result_cache, a:hash)
+        echom "cache hit"
         let l:raw = get(g:result_cache, a:hash)
     else
+        echom "cache miss"
         if s:ghost_text_nvim
             let l:raw = join(a:data, "\n")
         elseif s:ghost_text_vim

From c5181addccc42a27aaa4bf547bc2a0d79f916c0c Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Fri, 3 Jan 2025 18:37:18 -0600
Subject: [PATCH 5/5] removed echom

---
 autoload/llama.vim | 2 --
 1 file changed, 2 deletions(-)

diff --git a/autoload/llama.vim b/autoload/llama.vim
index 2c2bd47..acd067d 100644
--- a/autoload/llama.vim
+++ b/autoload/llama.vim
@@ -593,10 +593,8 @@ endfunction
 function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, event = v:null)
     " Retrieve the FIM result from cache
     if a:cache && has_key(g:result_cache, a:hash)
-        echom "cache hit"
         let l:raw = get(g:result_cache, a:hash)
     else
-        echom "cache miss"
         if s:ghost_text_nvim
             let l:raw = join(a:data, "\n")
         elseif s:ghost_text_vim