smallcloudai · arcticoder · Apr 25, 2025 · Apr 26, 2025 · Apr 26, 2025
diff --git a/refact-agent/engine/Cargo.toml b/refact-agent/engine/Cargo.toml
@@ -59,7 +59,6 @@ rust-embed = "8.5.0"
 percent-encoding = "2.3"
 serde = { version = "1", features = ["rc", "derive"] }
 serde_cbor = "0.11.2"
-serde-inline-default = "0.2.3"
 serde_json = { version = "1", features = ["preserve_order"] }
 serde_yaml = "0.9.31"
 # all features = ["compression", "docs", "event_log", "failpoints", "io_uring", "lock_free_delays", "measure_allocs", "miri_optimizations", "mutex", "no_inline", "no_logs", "pretty_backtrace", "testing"]

diff --git a/refact-agent/engine/bring_your_own_key/hf.yaml b/refact-agent/engine/bring_your_own_key/hf.yaml
diff --git a/refact-agent/engine/bring_your_own_key/mixed.yaml b/refact-agent/engine/bring_your_own_key/mixed.yaml
diff --git a/refact-agent/engine/bring_your_own_key/openai.yaml b/refact-agent/engine/bring_your_own_key/openai.yaml
diff --git a/refact-agent/engine/bring_your_own_key/openrouter.yaml b/refact-agent/engine/bring_your_own_key/openrouter.yaml
diff --git a/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml b/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml
diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py
@@ -54,9 +54,9 @@ async def answer_question_in_arguments(settings, arg_question):
 async def welcome_message(settings: cli_settings.CmdlineArgs, tip: str):
     text = f"""
 ~/.cache/refact/cli.yaml                -- set up this program
-~/.cache/refact/bring-your-own-key.yaml -- set up models you want to use
-~/.cache/refact/integrations.d/*        -- set up github, jira, make, gdb, and other tools, including which actions require confirmation
-~/.cache/refact/privacy.yaml            -- which files should never leave your computer
+~/.config/refact/providers.d/*.yaml      -- set up model providers you want to use
+~/.config/refact/integrations.d/*        -- set up github, jira, make, gdb, and other tools, including which actions require confirmation
+~/.config/refact/privacy.yaml            -- which files should never leave your computer
 Project: {settings.project_path}
 To exit, type 'exit' or Ctrl+D. {tip}.
 """
@@ -345,8 +345,8 @@ async def actual_chat(
     app = Application(key_bindings=kb, layout=layout)
     app.editing_mode = cli_settings.cli_yaml.get_editing_mode()
 
-    if cli_settings.args.model not in caps.code_chat_models:
-        known_models = list(caps.code_chat_models.keys())
+    if cli_settings.args.model not in caps.chat_models:
+        known_models = list(caps.chat_models.keys())
         print(f"model {cli_settings.args.model} is unknown, pick one of {known_models}")
         return
 

diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py
@@ -8,15 +8,12 @@
 
 class CapsModel(BaseModel):
     n_ctx: int
-    similar_models: List[str]
     supports_tools: bool
 
 
 class Caps(BaseModel):
-    cloud_name: str
-    code_chat_models: Dict[str, CapsModel]
-    code_chat_default_model: str
-    embedding_model: str
+    chat_models: Dict[str, CapsModel]
+    chat_default_model: str
 
 
 class SettingsCLI(BaseModel):
@@ -40,9 +37,7 @@ def get_editing_mode(self):
 
 
 default_config = """
-# The caps file is bring-your-own-key.yaml by default, that in turn works with OPENAI_API_KEY inside by default.
-# But you can change it to:
-#address_url: Refact
+address_url: Refact
 #api_key: <take-from-website>
 #address_url: http://your-self-hosting-server/
 #api_key: your-secret-key
@@ -66,14 +61,14 @@ def get_editing_mode(self):
 class CmdlineArgs:
     def __init__(self, caps: Caps, *, model: str, path_to_project: str, always_pause: bool, chat_id: str, chat_remote: bool):
         self.caps = caps
-        self.model = model or caps.code_chat_default_model
+        self.model = model or caps.chat_default_model
         self.project_path = path_to_project
         self.always_pause = always_pause
         self.chat_id = chat_id
         self.chat_remote = chat_remote
 
     def n_ctx(self):
-        return self.caps.code_chat_models[self.model].n_ctx
+        return self.caps.chat_models[self.model].n_ctx
 
 
 args: Optional[CmdlineArgs] = None

diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py
@@ -109,7 +109,7 @@ def process_streaming_data(data: Dict[str, Any], deltas_collector: Optional[chat
                 assert deltas_collector.choices[0].tool_calls is not None
                 streaming_toolcall = list(deltas_collector.choices[0].tool_calls)
                 update_entertainment_box()
-        finish_reason = choices[0]['finish_reason']
+        finish_reason = choices[0].get('finish_reason')
         if finish_reason == "stop":
             print_response("\n")
         if finish_reason == "tool_calls":

diff --git a/refact-agent/engine/src/agentic/compress_trajectory.rs b/refact-agent/engine/src/agentic/compress_trajectory.rs
@@ -89,16 +89,15 @@ pub async fn compress_trajectory(
     if messages.is_empty() {
         return Err("The provided chat is empty".to_string());
     }
-    let (model_name, n_ctx) = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+    let (model_id, n_ctx) = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
         Ok(caps) => {
-            let caps_locked = caps.read().unwrap();
-            let model_name = caps_locked.code_chat_default_model.clone();
-            if let Some(model_rec) = caps_locked.code_completion_models.get(&strip_model_from_finetune(&model_name)) {
-                Ok((model_name, model_rec.n_ctx))
+            let model_id = caps.defaults.chat_default_model.clone();
+            if let Some(model_rec) = caps.completion_models.get(&strip_model_from_finetune(&model_id)) {
+                Ok((model_id, model_rec.base.n_ctx))
             } else {
                 Err(format!(
-                    "Model '{}' not found. Server has these models: {:?}",
-                    model_name, caps_locked.code_completion_models.keys()
+                    "Model '{}' not found, server has these models: {:?}",
+                    model_id, caps.completion_models.keys()
                 ))
             }
         },
@@ -120,12 +119,12 @@ pub async fn compress_trajectory(
         messages_compress.clone(),
         "".to_string(),
         false,
-        model_name.clone(),
+        model_id.clone(),
     ).await));
     let tools = gather_used_tools(&messages);
     let new_messages = subchat_single(
         ccx.clone(),
-        model_name.as_str(),
+        &model_id,
         messages_compress,
         Some(tools),
         None,

diff --git a/refact-agent/engine/src/agentic/generate_commit_message.rs b/refact-agent/engine/src/agentic/generate_commit_message.rs
@@ -265,11 +265,8 @@ pub async fn generate_commit_message_by_diff(
             },
         ]
     };
-    let model_name = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
-        Ok(caps) => caps
-            .read()
-            .map(|x| Ok(x.code_chat_default_model.clone()))
-            .map_err(|_| "Caps are not available".to_string())?,
+    let model_id = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => Ok(caps.defaults.chat_default_model.clone()),
         Err(_) => Err("No caps available".to_string()),
     }?;
     let ccx: Arc<AMutex<AtCommandsContext>> = Arc::new(AMutex::new(AtCommandsContext::new(
@@ -280,11 +277,11 @@ pub async fn generate_commit_message_by_diff(
         messages.clone(),
         "".to_string(),
         false,
-        model_name.clone(),
+        model_id.clone(),
     ).await));
     let new_messages = subchat_single(
         ccx.clone(),
-        model_name.as_str(),
+        &model_id,
         messages,
         Some(vec![]),
         None,

diff --git a/refact-agent/engine/src/agentic/generate_follow_up_message.rs b/refact-agent/engine/src/agentic/generate_follow_up_message.rs
@@ -74,8 +74,7 @@ fn _make_conversation(
 pub async fn generate_follow_up_message(
     messages: Vec<ChatMessage>,
     gcx: Arc<ARwLock<GlobalContext>>,
-    light_model_name: String,
-    current_model_name: &String,
+    model_id: &str,
     chat_id: &str,
 ) -> Result<FollowUpResponse, String> {
     let ccx = Arc::new(AMutex::new(AtCommandsContext::new(
@@ -86,11 +85,11 @@ pub async fn generate_follow_up_message(
         messages.clone(),
         chat_id.to_string(),
         false,
-        current_model_name.clone(),
+        model_id.to_string(),
     ).await));
     let updated_messages: Vec<Vec<ChatMessage>> = subchat_single(
         ccx.clone(),
-        &light_model_name,
+        model_id,
         _make_conversation(&messages),
         Some(vec![]),
         None,

diff --git a/refact-agent/engine/src/ast/chunk_utils.rs b/refact-agent/engine/src/ast/chunk_utils.rs
@@ -1,13 +1,13 @@
 use std::collections::VecDeque;
 use std::path::PathBuf;
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 
 use itertools::Itertools;
 use ropey::Rope;
 use tokenizers::Tokenizer;
 
-use crate::ast::count_tokens;
+use crate::tokens::count_text_tokens;
+use crate::tokens::count_text_tokens_with_fallback;
 use crate::vecdb::vdb_structs::SplitResult;
 
 
@@ -17,9 +17,8 @@ pub fn official_text_hashing_function(s: &str) -> String {
 }
 
 
-fn split_line_if_needed(line: &str, tokenizer: Option<Arc<StdRwLock<Tokenizer>>>, tokens_limit: usize) -> Vec<String> {
+fn split_line_if_needed(line: &str, tokenizer: Option<Arc<Tokenizer>>, tokens_limit: usize) -> Vec<String> {
     if let Some(tokenizer) = tokenizer {
-        let tokenizer = tokenizer.read().unwrap();
         tokenizer.encode(line, false).map_or_else(
             |_| split_without_tokenizer(line, tokens_limit),
             |tokens| {
@@ -39,7 +38,7 @@ fn split_line_if_needed(line: &str, tokenizer: Option<Arc<StdRwLock<Tokenizer>>>
 }
 
 fn split_without_tokenizer(line: &str, tokens_limit: usize) -> Vec<String> {
-    if count_tokens(None, line) <= tokens_limit {
+    if count_text_tokens(None, line).is_ok_and(|tokens| tokens <= tokens_limit) {
         vec![line.to_string()]
     } else {
         Rope::from_str(line).chars()
@@ -54,7 +53,7 @@ pub fn get_chunks(text: &String,
                   file_path: &PathBuf,
                   symbol_path: &String,
                   top_bottom_rows: (usize, usize), // case with top comments
-                  tokenizer: Option<Arc<StdRwLock<Tokenizer>>>,
+                  tokenizer: Option<Arc<Tokenizer>>,
                   tokens_limit: usize,
                   intersection_lines: usize,
                   use_symbol_range_always: bool, // use for skeleton case
@@ -70,7 +69,7 @@ pub fn get_chunks(text: &String,
         let mut previous_start = line_idx;
         while line_idx < lines.len() {
             let line = lines[line_idx];
-            let line_tok_n = count_tokens(tokenizer.clone(), line);
+            let line_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line);
 
             if !accum.is_empty() && current_tok_n + line_tok_n > tokens_limit {
                 let current_line = accum.iter().map(|(line, _)| line).join("\n");
@@ -105,7 +104,7 @@ pub fn get_chunks(text: &String,
         current_tok_n = 0;
         while line_idx >= 0 {
             let line = lines[line_idx as usize];
-            let text_orig_tok_n = count_tokens(tokenizer.clone(), line);
+            let text_orig_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line);
             if !accum.is_empty() && current_tok_n + text_orig_tok_n > tokens_limit {
                 let current_line = accum.iter().map(|(line, _)| line).join("\n");
                 let start_line = if use_symbol_range_always { top_row as u64 } else { accum.front().unwrap().1 as u64 };
@@ -153,10 +152,10 @@ pub fn get_chunks(text: &String,
 mod tests {
     use std::path::PathBuf;
     use std::str::FromStr;
-    use std::sync::{Arc, RwLock as StdRwLock};
+    use std::sync::Arc;
 
     use crate::ast::chunk_utils::get_chunks;
-    use crate::ast::count_tokens;
+    use crate::tokens::count_text_tokens;
     // use crate::vecdb::vdb_structs::SplitResult;
 
     const DUMMY_TOKENIZER: &str = include_str!("dummy_tokenizer.json");
@@ -174,23 +173,23 @@ mod tests {
 
     #[test]
     fn dummy_tokenizer_test() {
-        let tokenizer = Arc::new(StdRwLock::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()));
-        let text_orig_tok_n = count_tokens(Some(tokenizer.clone()), PYTHON_CODE);
+        let tokenizer = Arc::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap());
+        let text_orig_tok_n = count_text_tokens(Some(tokenizer.clone()), PYTHON_CODE).unwrap();
         assert_eq!(text_orig_tok_n, PYTHON_CODE.len());
     }
 
     #[test]
     fn simple_chunk_test_1_with_128_limit() {
-        let tokenizer = Arc::new(StdRwLock::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()));
-        let orig = include_str!("../caps.rs").to_string();
+        let tokenizer = Some(Arc::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()));
+        let orig = include_str!("../caps/mod.rs").to_string();
         let token_limits = [10, 50, 100, 200, 300];
         for &token_limit in &token_limits {
             let chunks = get_chunks(
                 &orig,
                 &PathBuf::from_str("/tmp/test.py").unwrap(),
                 &"".to_string(),
                 (0, 10),
-                Some(tokenizer.clone()),
+                tokenizer.clone(),
                 token_limit, 2, false);
             let mut not_present: Vec<char> = orig.chars().collect();
             let mut result = String::new();

diff --git a/refact-agent/engine/src/ast/file_splitter.rs b/refact-agent/engine/src/ast/file_splitter.rs
@@ -1,8 +1,8 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 use itertools::Itertools;
+use tokenizers::Tokenizer;
 use tokio::sync::RwLock;
-use std::sync::RwLock as StdRwLock;
 use uuid::Uuid;
 
 use crate::ast::treesitter::parsers::get_ast_parser_by_filename;
@@ -30,7 +30,7 @@ impl AstBasedFileSplitter {
     pub async fn vectorization_split(
         &self,
         doc: &Document,
-        tokenizer: Option<Arc<StdRwLock<tokenizers::Tokenizer>>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         gcx: Arc<RwLock<crate::global_context::GlobalContext>>,
         tokens_limit: usize,
     ) -> Result<Vec<crate::vecdb::vdb_structs::SplitResult>, String> {