diff --git a/refact-agent/engine/Cargo.toml b/refact-agent/engine/Cargo.toml index c1a6191e2..bc16b8ca2 100644 --- a/refact-agent/engine/Cargo.toml +++ b/refact-agent/engine/Cargo.toml @@ -59,7 +59,6 @@ rust-embed = "8.5.0" percent-encoding = "2.3" serde = { version = "1", features = ["rc", "derive"] } serde_cbor = "0.11.2" -serde-inline-default = "0.2.3" serde_json = { version = "1", features = ["preserve_order"] } serde_yaml = "0.9.31" # all features = ["compression", "docs", "event_log", "failpoints", "io_uring", "lock_free_delays", "measure_allocs", "miri_optimizations", "mutex", "no_inline", "no_logs", "pretty_backtrace", "testing"] diff --git a/refact-agent/engine/bring_your_own_key/hf.yaml b/refact-agent/engine/bring_your_own_key/hf.yaml deleted file mode 100644 index 68a85453f..000000000 --- a/refact-agent/engine/bring_your_own_key/hf.yaml +++ /dev/null @@ -1,21 +0,0 @@ -cloud_name: HuggingFace API - -completion_endpoint: "https://api-inference.huggingface.co/models/$MODEL" -completion_endpoint_style: "hf" -completion_model: bigcode/starcoder2-3b -completion_apikey: "$HF_TOKEN" - -chat_endpoint: "https://api-inference.huggingface.co/models/$MODEL" -chat_endpoint_style: "hf" -chat_apikey: "$HF_TOKEN" -chat_model: meta-llama/Llama-2-70b-chat-hf - -tokenizer_rewrite_path: # because you need to agree to licensing agreement in the official repo to even download a tokenizer - meta-llama/Llama-2-70b-chat-hf: TheBloke/Llama-2-70B-fp16 - -embedding_endpoint: "https://api-inference.huggingface.co/pipeline/feature-extraction/$MODEL" -embedding_endpoint_style: "hf" -embedding_apikey: "$HF_TOKEN" -embedding_model: thenlper/gte-base -embedding_size: 768 -#embedding_batch: 64 diff --git a/refact-agent/engine/bring_your_own_key/mixed.yaml b/refact-agent/engine/bring_your_own_key/mixed.yaml deleted file mode 100644 index f28d8424c..000000000 --- a/refact-agent/engine/bring_your_own_key/mixed.yaml +++ /dev/null @@ -1,20 +0,0 @@ -cloud_name: Mixed API - -chat_endpoint: "https://openrouter.ai/api/v1/chat/completions" -chat_apikey: "$OPENROUTER_API_KEY" -chat_model: meta-llama/llama-3.1-8b-instruct - -completion_endpoint_style: "hf" -completion_endpoint: "https://api-inference.huggingface.co/models/$MODEL" -completion_model: bigcode/starcoder2-3b -completion_apikey: "$HF_TOKEN" - -embedding_endpoint: "https://api.openai.com/v1/embeddings" -embedding_apikey: "$OPENAI_API_KEY" -embedding_default_model: text-embedding-3-small -embedding_size: 1536 - -tokenizer_rewrite_path: - meta-llama/llama-3.1-8b-instruct: unsloth/llama-3-8b-bnb-4bit - - diff --git a/refact-agent/engine/bring_your_own_key/openai.yaml b/refact-agent/engine/bring_your_own_key/openai.yaml deleted file mode 100644 index 9cd41f51a..000000000 --- a/refact-agent/engine/bring_your_own_key/openai.yaml +++ /dev/null @@ -1,16 +0,0 @@ -cloud_name: OpenAI API - -chat_endpoint: "https://api.openai.com/v1/chat/completions" -chat_apikey: "sk-..." # or use $OPENAI_API_KEY if you have it in global environment variables -chat_model: gpt-4o-mini - -embedding_endpoint: "https://api.openai.com/v1/embeddings" -embedding_apikey: "sk-..." -embedding_model: text-embedding-3-small -embedding_size: 1536 - -# no code completion though :/ - - -running_models: - - gpt-4o diff --git a/refact-agent/engine/bring_your_own_key/openrouter.yaml b/refact-agent/engine/bring_your_own_key/openrouter.yaml deleted file mode 100644 index 912a97f38..000000000 --- a/refact-agent/engine/bring_your_own_key/openrouter.yaml +++ /dev/null @@ -1,13 +0,0 @@ -cloud_name: OpenRouter API - -chat_endpoint: "https://openrouter.ai/api/v1/chat/completions" -chat_apikey: "$OPENROUTER_API_KEY" -chat_model: meta-llama/llama-3.1-8b-instruct -tokenizer_rewrite_path: - meta-llama/llama-3.1-8b-instruct: unsloth/llama-3-8b-bnb-4bit - -running_models: - - gpt-4o - - meta-llama/llama-3.1-8b-instruct - -# no code completion though :/ diff --git a/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml b/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml deleted file mode 100644 index 28681d4cc..000000000 --- a/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml +++ /dev/null @@ -1,11 +0,0 @@ -cloud_name: Refact local self-hosting server - -# Under development -#chat_endpoint: "http://localhost:8008/v1/chat/completions" -#chat_model: "qwen2.5/coder/1.5b/instruct" - -embedding_endpoint: "http://localhost:8008/v1/embeddings" -embedding_model: "thenlper/gte-base" - -completion_endpoint: "http://localhost:8008/v1/completions" -completion_model: "Refact/1.6B" diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py index 720a4a1a2..5ea5d435b 100644 --- a/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py +++ b/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py @@ -54,9 +54,9 @@ async def answer_question_in_arguments(settings, arg_question): async def welcome_message(settings: cli_settings.CmdlineArgs, tip: str): text = f""" ~/.cache/refact/cli.yaml -- set up this program -~/.cache/refact/bring-your-own-key.yaml -- set up models you want to use -~/.cache/refact/integrations.d/* -- set up github, jira, make, gdb, and other tools, including which actions require confirmation -~/.cache/refact/privacy.yaml -- which files should never leave your computer +~/.config/refact/providers.d/*.yaml -- set up model providers you want to use +~/.config/refact/integrations.d/* -- set up github, jira, make, gdb, and other tools, including which actions require confirmation +~/.config/refact/privacy.yaml -- which files should never leave your computer Project: {settings.project_path} To exit, type 'exit' or Ctrl+D. {tip}. """ @@ -345,8 +345,8 @@ async def actual_chat( app = Application(key_bindings=kb, layout=layout) app.editing_mode = cli_settings.cli_yaml.get_editing_mode() - if cli_settings.args.model not in caps.code_chat_models: - known_models = list(caps.code_chat_models.keys()) + if cli_settings.args.model not in caps.chat_models: + known_models = list(caps.chat_models.keys()) print(f"model {cli_settings.args.model} is unknown, pick one of {known_models}") return diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py index 7dcbe8c5c..71d110238 100644 --- a/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py +++ b/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py @@ -8,15 +8,12 @@ class CapsModel(BaseModel): n_ctx: int - similar_models: List[str] supports_tools: bool class Caps(BaseModel): - cloud_name: str - code_chat_models: Dict[str, CapsModel] - code_chat_default_model: str - embedding_model: str + chat_models: Dict[str, CapsModel] + chat_default_model: str class SettingsCLI(BaseModel): @@ -40,9 +37,7 @@ def get_editing_mode(self): default_config = """ -# The caps file is bring-your-own-key.yaml by default, that in turn works with OPENAI_API_KEY inside by default. -# But you can change it to: -#address_url: Refact +address_url: Refact #api_key: #address_url: http://your-self-hosting-server/ #api_key: your-secret-key @@ -66,14 +61,14 @@ def get_editing_mode(self): class CmdlineArgs: def __init__(self, caps: Caps, *, model: str, path_to_project: str, always_pause: bool, chat_id: str, chat_remote: bool): self.caps = caps - self.model = model or caps.code_chat_default_model + self.model = model or caps.chat_default_model self.project_path = path_to_project self.always_pause = always_pause self.chat_id = chat_id self.chat_remote = chat_remote def n_ctx(self): - return self.caps.code_chat_models[self.model].n_ctx + return self.caps.chat_models[self.model].n_ctx args: Optional[CmdlineArgs] = None diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py index 4ca448f8c..18d0553cf 100644 --- a/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py +++ b/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py @@ -109,7 +109,7 @@ def process_streaming_data(data: Dict[str, Any], deltas_collector: Optional[chat assert deltas_collector.choices[0].tool_calls is not None streaming_toolcall = list(deltas_collector.choices[0].tool_calls) update_entertainment_box() - finish_reason = choices[0]['finish_reason'] + finish_reason = choices[0].get('finish_reason') if finish_reason == "stop": print_response("\n") if finish_reason == "tool_calls": diff --git a/refact-agent/engine/src/agentic/compress_trajectory.rs b/refact-agent/engine/src/agentic/compress_trajectory.rs index c93332589..e5dd5b93d 100644 --- a/refact-agent/engine/src/agentic/compress_trajectory.rs +++ b/refact-agent/engine/src/agentic/compress_trajectory.rs @@ -89,16 +89,15 @@ pub async fn compress_trajectory( if messages.is_empty() { return Err("The provided chat is empty".to_string()); } - let (model_name, n_ctx) = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + let (model_id, n_ctx) = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { Ok(caps) => { - let caps_locked = caps.read().unwrap(); - let model_name = caps_locked.code_chat_default_model.clone(); - if let Some(model_rec) = caps_locked.code_completion_models.get(&strip_model_from_finetune(&model_name)) { - Ok((model_name, model_rec.n_ctx)) + let model_id = caps.defaults.chat_default_model.clone(); + if let Some(model_rec) = caps.completion_models.get(&strip_model_from_finetune(&model_id)) { + Ok((model_id, model_rec.base.n_ctx)) } else { Err(format!( - "Model '{}' not found. Server has these models: {:?}", - model_name, caps_locked.code_completion_models.keys() + "Model '{}' not found, server has these models: {:?}", + model_id, caps.completion_models.keys() )) } }, @@ -120,12 +119,12 @@ pub async fn compress_trajectory( messages_compress.clone(), "".to_string(), false, - model_name.clone(), + model_id.clone(), ).await)); let tools = gather_used_tools(&messages); let new_messages = subchat_single( ccx.clone(), - model_name.as_str(), + &model_id, messages_compress, Some(tools), None, diff --git a/refact-agent/engine/src/agentic/generate_commit_message.rs b/refact-agent/engine/src/agentic/generate_commit_message.rs index 5cd7062a9..bb469559f 100644 --- a/refact-agent/engine/src/agentic/generate_commit_message.rs +++ b/refact-agent/engine/src/agentic/generate_commit_message.rs @@ -265,11 +265,8 @@ pub async fn generate_commit_message_by_diff( }, ] }; - let model_name = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { - Ok(caps) => caps - .read() - .map(|x| Ok(x.code_chat_default_model.clone())) - .map_err(|_| "Caps are not available".to_string())?, + let model_id = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + Ok(caps) => Ok(caps.defaults.chat_default_model.clone()), Err(_) => Err("No caps available".to_string()), }?; let ccx: Arc> = Arc::new(AMutex::new(AtCommandsContext::new( @@ -280,11 +277,11 @@ pub async fn generate_commit_message_by_diff( messages.clone(), "".to_string(), false, - model_name.clone(), + model_id.clone(), ).await)); let new_messages = subchat_single( ccx.clone(), - model_name.as_str(), + &model_id, messages, Some(vec![]), None, diff --git a/refact-agent/engine/src/agentic/generate_follow_up_message.rs b/refact-agent/engine/src/agentic/generate_follow_up_message.rs index fae6e0aeb..e6faca196 100644 --- a/refact-agent/engine/src/agentic/generate_follow_up_message.rs +++ b/refact-agent/engine/src/agentic/generate_follow_up_message.rs @@ -74,8 +74,7 @@ fn _make_conversation( pub async fn generate_follow_up_message( messages: Vec, gcx: Arc>, - light_model_name: String, - current_model_name: &String, + model_id: &str, chat_id: &str, ) -> Result { let ccx = Arc::new(AMutex::new(AtCommandsContext::new( @@ -86,11 +85,11 @@ pub async fn generate_follow_up_message( messages.clone(), chat_id.to_string(), false, - current_model_name.clone(), + model_id.to_string(), ).await)); let updated_messages: Vec> = subchat_single( ccx.clone(), - &light_model_name, + model_id, _make_conversation(&messages), Some(vec![]), None, diff --git a/refact-agent/engine/src/ast/chunk_utils.rs b/refact-agent/engine/src/ast/chunk_utils.rs index fcc73d84b..569880bf3 100644 --- a/refact-agent/engine/src/ast/chunk_utils.rs +++ b/refact-agent/engine/src/ast/chunk_utils.rs @@ -1,13 +1,13 @@ use std::collections::VecDeque; use std::path::PathBuf; use std::sync::Arc; -use std::sync::RwLock as StdRwLock; use itertools::Itertools; use ropey::Rope; use tokenizers::Tokenizer; -use crate::ast::count_tokens; +use crate::tokens::count_text_tokens; +use crate::tokens::count_text_tokens_with_fallback; use crate::vecdb::vdb_structs::SplitResult; @@ -17,9 +17,8 @@ pub fn official_text_hashing_function(s: &str) -> String { } -fn split_line_if_needed(line: &str, tokenizer: Option>>, tokens_limit: usize) -> Vec { +fn split_line_if_needed(line: &str, tokenizer: Option>, tokens_limit: usize) -> Vec { if let Some(tokenizer) = tokenizer { - let tokenizer = tokenizer.read().unwrap(); tokenizer.encode(line, false).map_or_else( |_| split_without_tokenizer(line, tokens_limit), |tokens| { @@ -39,7 +38,7 @@ fn split_line_if_needed(line: &str, tokenizer: Option>> } fn split_without_tokenizer(line: &str, tokens_limit: usize) -> Vec { - if count_tokens(None, line) <= tokens_limit { + if count_text_tokens(None, line).is_ok_and(|tokens| tokens <= tokens_limit) { vec![line.to_string()] } else { Rope::from_str(line).chars() @@ -54,7 +53,7 @@ pub fn get_chunks(text: &String, file_path: &PathBuf, symbol_path: &String, top_bottom_rows: (usize, usize), // case with top comments - tokenizer: Option>>, + tokenizer: Option>, tokens_limit: usize, intersection_lines: usize, use_symbol_range_always: bool, // use for skeleton case @@ -70,7 +69,7 @@ pub fn get_chunks(text: &String, let mut previous_start = line_idx; while line_idx < lines.len() { let line = lines[line_idx]; - let line_tok_n = count_tokens(tokenizer.clone(), line); + let line_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line); if !accum.is_empty() && current_tok_n + line_tok_n > tokens_limit { let current_line = accum.iter().map(|(line, _)| line).join("\n"); @@ -105,7 +104,7 @@ pub fn get_chunks(text: &String, current_tok_n = 0; while line_idx >= 0 { let line = lines[line_idx as usize]; - let text_orig_tok_n = count_tokens(tokenizer.clone(), line); + let text_orig_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line); if !accum.is_empty() && current_tok_n + text_orig_tok_n > tokens_limit { let current_line = accum.iter().map(|(line, _)| line).join("\n"); let start_line = if use_symbol_range_always { top_row as u64 } else { accum.front().unwrap().1 as u64 }; @@ -153,10 +152,10 @@ pub fn get_chunks(text: &String, mod tests { use std::path::PathBuf; use std::str::FromStr; - use std::sync::{Arc, RwLock as StdRwLock}; + use std::sync::Arc; use crate::ast::chunk_utils::get_chunks; - use crate::ast::count_tokens; + use crate::tokens::count_text_tokens; // use crate::vecdb::vdb_structs::SplitResult; const DUMMY_TOKENIZER: &str = include_str!("dummy_tokenizer.json"); @@ -174,15 +173,15 @@ mod tests { #[test] fn dummy_tokenizer_test() { - let tokenizer = Arc::new(StdRwLock::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap())); - let text_orig_tok_n = count_tokens(Some(tokenizer.clone()), PYTHON_CODE); + let tokenizer = Arc::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()); + let text_orig_tok_n = count_text_tokens(Some(tokenizer.clone()), PYTHON_CODE).unwrap(); assert_eq!(text_orig_tok_n, PYTHON_CODE.len()); } #[test] fn simple_chunk_test_1_with_128_limit() { - let tokenizer = Arc::new(StdRwLock::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap())); - let orig = include_str!("../caps.rs").to_string(); + let tokenizer = Some(Arc::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap())); + let orig = include_str!("../caps/mod.rs").to_string(); let token_limits = [10, 50, 100, 200, 300]; for &token_limit in &token_limits { let chunks = get_chunks( @@ -190,7 +189,7 @@ mod tests { &PathBuf::from_str("/tmp/test.py").unwrap(), &"".to_string(), (0, 10), - Some(tokenizer.clone()), + tokenizer.clone(), token_limit, 2, false); let mut not_present: Vec = orig.chars().collect(); let mut result = String::new(); diff --git a/refact-agent/engine/src/ast/file_splitter.rs b/refact-agent/engine/src/ast/file_splitter.rs index 044edccf9..ab5e28a44 100644 --- a/refact-agent/engine/src/ast/file_splitter.rs +++ b/refact-agent/engine/src/ast/file_splitter.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; use std::sync::Arc; use itertools::Itertools; +use tokenizers::Tokenizer; use tokio::sync::RwLock; -use std::sync::RwLock as StdRwLock; use uuid::Uuid; use crate::ast::treesitter::parsers::get_ast_parser_by_filename; @@ -30,7 +30,7 @@ impl AstBasedFileSplitter { pub async fn vectorization_split( &self, doc: &Document, - tokenizer: Option>>, + tokenizer: Option>, gcx: Arc>, tokens_limit: usize, ) -> Result, String> { diff --git a/refact-agent/engine/src/ast/mod.rs b/refact-agent/engine/src/ast/mod.rs index 852c51be1..e00095ad4 100644 --- a/refact-agent/engine/src/ast/mod.rs +++ b/refact-agent/engine/src/ast/mod.rs @@ -1,7 +1,5 @@ use std::collections::HashMap; use std::sync::Arc; -#[cfg(feature="vecdb")] -use std::sync::RwLock as StdRwLock; use std::cell::RefCell; use uuid::Uuid; use crate::files_in_workspace::Document; @@ -23,27 +21,6 @@ pub mod chunk_utils; pub mod parse_python; pub mod parse_common; - -#[cfg(feature="vecdb")] -pub fn count_tokens( - tokenizer: Option>>, - text: &str, -) -> usize { - if let Some(tokenizer) = tokenizer { - let tokenizer_locked = tokenizer.write().unwrap(); - let tokens = match tokenizer_locked.encode(text, false) { - Ok(tokens) => tokens, - Err(err) => { - tracing::warn!("Encoding error: {}", err); - return 0; - } - }; - tokens.len() - } else { - 1 + text.len() / 3 - } -} - pub fn lowlevel_file_markup( doc: &Document, symbols: &Vec, diff --git a/refact-agent/engine/src/at_commands/at_search.rs b/refact-agent/engine/src/at_commands/at_search.rs index 067d6faf0..6f576b69e 100644 --- a/refact-agent/engine/src/at_commands/at_search.rs +++ b/refact-agent/engine/src/at_commands/at_search.rs @@ -7,7 +7,6 @@ use crate::nicer_logs::last_n_chars; use crate::at_commands::execute_at::AtCommandMember; use crate::call_validation::{ContextEnum, ContextFile}; -use crate::caps::get_custom_embedding_api_key; use crate::vecdb; use crate::vecdb::vdb_structs::VecdbSearch; @@ -69,18 +68,12 @@ pub async fn execute_at_search( (ccx_locked.global_context.clone(), ccx_locked.top_n) }; - let api_key = get_custom_embedding_api_key(gcx.clone()).await; - if let Err(err) = api_key { - return Err(err.message); - } - let api_key = api_key.unwrap(); - let vec_db = gcx.read().await.vec_db.clone(); let r = match *vec_db.lock().await { Some(ref db) => { let top_n_twice_as_big = top_n * 2; // top_n will be cut at postprocessing stage, and we really care about top_n files, not pieces // TODO: this code sucks, release lock, don't hold anything during the search - let search_result = db.vecdb_search(query.clone(), top_n_twice_as_big, vecdb_scope_filter_mb, &api_key).await?; + let search_result = db.vecdb_search(query.clone(), top_n_twice_as_big, vecdb_scope_filter_mb).await?; let results = search_result.results.clone(); return Ok(results2message(&results)); } diff --git a/refact-agent/engine/src/at_commands/execute_at.rs b/refact-agent/engine/src/at_commands/execute_at.rs index 53511d9d6..f91b9387d 100644 --- a/refact-agent/engine/src/at_commands/execute_at.rs +++ b/refact-agent/engine/src/at_commands/execute_at.rs @@ -1,4 +1,4 @@ -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use tokio::sync::Mutex as AMutex; use regex::Regex; use serde_json::{json, Value}; @@ -20,7 +20,7 @@ pub const MIN_RAG_CONTEXT_LIMIT: usize = 256; pub async fn run_at_commands_locally( ccx: Arc>, - tokenizer: Arc>, + tokenizer: Option>, maxgen: usize, original_messages: &Vec, stream_back_to_user: &mut HasRagResults, @@ -164,7 +164,7 @@ pub async fn run_at_commands_locally( pub async fn run_at_commands_remotely( ccx: Arc>, - model_name: &str, + model_id: &str, maxgen: usize, original_messages: &Vec, stream_back_to_user: &mut HasRagResults, @@ -186,7 +186,7 @@ pub async fn run_at_commands_remotely( maxgen, subchat_tool_parameters, postprocess_parameters, - model_name: model_name.to_string(), + model_name: model_id.to_string(), chat_id: chat_id.clone(), }; diff --git a/refact-agent/engine/src/cached_tokenizers.rs b/refact-agent/engine/src/cached_tokenizers.rs deleted file mode 100644 index 10e477396..000000000 --- a/refact-agent/engine/src/cached_tokenizers.rs +++ /dev/null @@ -1,163 +0,0 @@ -use tokio::io::AsyncWriteExt; -use std::path::Path; -use std::sync::{Arc, RwLock as StdRwLock}; -use std::time::Duration; -use tokio::sync::RwLock as ARwLock; -use tokio::sync::Mutex as AMutex; -use tokenizers::Tokenizer; -use reqwest::header::AUTHORIZATION; -use reqwest::Response; -use tracing::{error, info}; -use uuid::Uuid; - -use crate::global_context::GlobalContext; -use crate::caps::{CodeAssistantCaps, strip_model_from_finetune}; - - -async fn try_open_tokenizer( - res: Response, - to: impl AsRef, -) -> Result<(), String> { - let mut file = tokio::fs::OpenOptions::new() - .write(true) - .create(true) - .open(&to) - .await - .map_err(|e| format!("failed to open file: {}", e))?; - file.write_all(&res.bytes().await - .map_err(|e| format!("failed to fetch bytes: {}", e))? - ).await.map_err(|e| format!("failed to write to file: {}", e))?; - file.flush().await.map_err(|e| format!("failed to flush file: {}", e))?; - info!("saved tokenizer to {}", to.as_ref().display()); - Ok(()) -} - -async fn download_tokenizer_file( - http_client: &reqwest::Client, - http_path: &str, - api_token: String, - to: impl AsRef, -) -> Result<(), String> { - tokio::fs::create_dir_all( - to.as_ref().parent().ok_or_else(|| "tokenizer path has no parent")?, - ).await.map_err(|e| format!("failed to create parent dir: {}", e))?; - if to.as_ref().exists() { - return Ok(()); - } - - info!("downloading tokenizer from {}", http_path); - let mut req = http_client.get(http_path); - if api_token.to_lowercase().starts_with("hf_") { - req = req.header(AUTHORIZATION, format!("Bearer {api_token}")) - } - let res = req - .send() - .await - .map_err(|e| format!("failed to get response: {}", e))? - .error_for_status() - .map_err(|e| format!("failed to get response: {}", e))?; - try_open_tokenizer(res, to).await?; - Ok(()) -} - -fn check_json_file(path: &Path) -> bool { - match Tokenizer::from_file(path) { - Ok(_) => { true } - Err(_) => { false } - } -} - -async fn try_download_tokenizer_file_and_open( - http_client: &reqwest::Client, - http_path: &str, - api_token: String, - to: impl AsRef, -) -> Result<(), String> { - let path = to.as_ref(); - if path.exists() && check_json_file(path) { - return Ok(()); - } - - let tmp_file = std::env::temp_dir().join(Uuid::new_v4().to_string()); - let tmp_path = tmp_file.as_path(); - - for i in 0..15 { - if i != 0 { - tokio::time::sleep(Duration::from_millis(200)).await; - } - let res = download_tokenizer_file(http_client, http_path, api_token.clone(), tmp_path).await; - if res.is_err() { - error!("failed to download tokenizer: {}", res.unwrap_err()); - continue; - } - - let parent = path.parent(); - if parent.is_none() { - error!("failed to download tokenizer: parent is not set"); - continue; - } - - let res = tokio::fs::create_dir_all(parent.unwrap()).await; - if res.is_err() { - error!("failed to create parent dir: {}", res.unwrap_err()); - continue; - } - - if !check_json_file(tmp_path) { - error!("failed to download tokenizer: file is not a tokenizer"); - continue; - } - - match tokio::fs::copy(tmp_path, path).await { - Ok(_) => { - info!("moved tokenizer to {}", path.display()); - return Ok(()); - }, - Err(_) => { continue; } - } - } - Err("failed to download tokenizer".to_string()) -} - -pub async fn cached_tokenizer( - caps: Arc>, - global_context: Arc>, - model_name: String, -) -> Result>, String> { - let model_name = strip_model_from_finetune(&model_name); - let tokenizer_download_lock: Arc> = global_context.read().await.tokenizer_download_lock.clone(); - let _tokenizer_download_locked = tokenizer_download_lock.lock().await; - - let (client2, cache_dir, tokenizer_arc, api_key) = { - let cx_locked = global_context.read().await; - (cx_locked.http_client.clone(), cx_locked.cache_dir.clone(), cx_locked.tokenizer_map.clone().get(&model_name).cloned(), cx_locked.cmdline.api_key.clone()) - }; - - if tokenizer_arc.is_some() { - return Ok(tokenizer_arc.unwrap().clone()) - } - - let tokenizer_cache_dir = std::path::PathBuf::from(cache_dir).join("tokenizers"); - tokio::fs::create_dir_all(&tokenizer_cache_dir) - .await - .expect("failed to create cache dir"); - let to = tokenizer_cache_dir.join(model_name.clone()).join("tokenizer.json"); - let http_path = { - let caps_locked = caps.read().unwrap(); - if caps_locked.tokenizer_path_template.is_empty() { - caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name).clone() - } else { - let rewritten_model_name = caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name); - caps_locked.tokenizer_path_template.replace("$MODEL", rewritten_model_name) - } - }; - try_download_tokenizer_file_and_open(&client2, http_path.as_str(), api_key.clone(), &to).await?; - info!("loading tokenizer \"{}\"", to.display()); - let mut tokenizer = Tokenizer::from_file(to).map_err(|e| format!("failed to load tokenizer: {}", e))?; - let _ = tokenizer.with_truncation(None); - tokenizer.with_padding(None); - let arc = Arc::new(StdRwLock::new(tokenizer)); - - global_context.write().await.tokenizer_map.insert(model_name.clone(), arc.clone()); - Ok(arc) -} diff --git a/refact-agent/engine/src/call_validation.rs b/refact-agent/engine/src/call_validation.rs index eeeca8ff0..67262783b 100644 --- a/refact-agent/engine/src/call_validation.rs +++ b/refact-agent/engine/src/call_validation.rs @@ -63,8 +63,6 @@ pub struct CodeCompletionPost { #[serde(default)] pub model: String, #[serde(default)] - pub scratchpad: String, - #[serde(default)] pub stream: bool, #[serde(default)] pub no_cache: bool, @@ -184,8 +182,33 @@ pub struct ChatMessage { pub thinking_blocks: Option>, } +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +#[serde(rename_all = "lowercase")] +pub enum ModelType { + Chat, + Completion, + Embedding, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ChatModelType { + Light, + Default, + Thinking +} + +impl Default for ChatModelType { + fn default() -> Self { + ChatModelType::Default + } +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct SubchatParameters { + #[serde(default)] + pub subchat_model_type: ChatModelType, + #[serde(default)] pub subchat_model: String, pub subchat_n_ctx: usize, #[serde(default)] @@ -205,8 +228,6 @@ pub struct ChatPost { pub parameters: SamplingParameters, #[serde(default)] pub model: String, - #[serde(default)] - pub scratchpad: String, pub stream: Option, pub temperature: Option, #[serde(default)] @@ -361,7 +382,6 @@ mod tests { ..Default::default() }, model: "".to_string(), - scratchpad: "".to_string(), stream: false, no_cache: false, use_ast: true, @@ -392,7 +412,6 @@ mod tests { ..Default::default() }, model: "".to_string(), - scratchpad: "".to_string(), stream: false, no_cache: false, use_ast: true, @@ -423,7 +442,6 @@ mod tests { ..Default::default() }, model: "".to_string(), - scratchpad: "".to_string(), stream: false, no_cache: false, use_ast: true, @@ -454,7 +472,6 @@ mod tests { ..Default::default() }, model: "".to_string(), - scratchpad: "".to_string(), stream: false, no_cache: false, use_ast: true, diff --git a/refact-agent/engine/src/caps.rs b/refact-agent/engine/src/caps.rs deleted file mode 100644 index 28026a9e6..000000000 --- a/refact-agent/engine/src/caps.rs +++ /dev/null @@ -1,715 +0,0 @@ -use std::path::PathBuf; -use std::collections::HashMap; -use indexmap::IndexMap; -use std::fs::File; -use std::io::Read; -use std::sync::Arc; -use std::sync::RwLock as StdRwLock; -use serde::Deserialize; -use serde::Serialize; -use serde_json::Value; -use tokio::sync::RwLock as ARwLock; -use url::Url; -use tracing::{error, info, warn}; - -use crate::custom_error::ScratchError; -use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext}; -use crate::known_models::KNOWN_MODELS; - - -const CAPS_FILENAME: &str = "refact-caps"; -const CAPS_FILENAME_FALLBACK: &str = "coding_assistant_caps.json"; - - -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ModelRecord { - #[serde(default)] - pub n_ctx: usize, - #[serde(default)] - pub supports_scratchpads: HashMap, - #[serde(default)] - pub default_scratchpad: String, - #[serde(default)] - pub similar_models: Vec, - #[serde(default)] - pub supports_tools: bool, - #[serde(default)] - pub supports_multimodality: bool, - #[serde(default)] - pub supports_clicks: bool, - #[serde(default)] - pub supports_agent: bool, - #[serde(default)] - pub supports_reasoning: Option, - #[serde(default)] - pub supports_boost_reasoning: bool, - #[serde(default)] - pub default_temperature: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct EmbeddingModelRecord { - #[serde(default)] - pub n_ctx: usize, - #[serde(default)] - pub size: i32, -} - -#[derive(Debug, Deserialize)] -pub struct ModelsOnly { - pub code_completion_models: IndexMap, - pub code_chat_models: IndexMap, - pub tokenizer_rewrite_path: HashMap, -} - -fn default_tokenizer_path_template() -> String { - String::from("https://huggingface.co/$MODEL/resolve/main/tokenizer.json") -} - -fn default_telemetry_basic_dest() -> String { - String::from("https://www.smallcloud.ai/v1/telemetry-basic") -} - -fn default_telemetry_basic_retrieve_my_own() -> String { - String::from("https://www.smallcloud.ai/v1/telemetry-retrieve-my-own-stats") -} - -fn default_endpoint_style() -> String { - String::from("openai") -} - -fn default_code_completion_n_ctx() -> usize { - 2048 -} - -fn default_endpoint_embeddings_style() -> String { - String::from("openai") -} - -fn default_support_metadata() -> bool { false } - -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct CodeAssistantCaps { - pub cloud_name: String, - - #[serde(default = "default_endpoint_style")] - pub endpoint_style: String, - #[serde(default)] - pub chat_endpoint_style: String, - #[serde(default = "default_endpoint_style")] - pub completion_endpoint_style: String, - - #[serde(default)] - pub endpoint_template: String, - #[serde(default)] - pub completion_endpoint: String, - #[serde(default)] - pub chat_endpoint: String, - - // default api key is in the command line - #[serde(default)] - pub completion_apikey: String, - #[serde(default)] - pub chat_apikey: String, - #[serde(default)] - pub embedding_apikey: String, - - #[serde(default)] - pub endpoint_chat_passthrough: String, - #[serde(default = "default_tokenizer_path_template")] - pub tokenizer_path_template: String, - #[serde(default)] - pub tokenizer_rewrite_path: HashMap, - #[serde(default = "default_telemetry_basic_dest")] - pub telemetry_basic_dest: String, - #[serde(default = "default_telemetry_basic_retrieve_my_own")] - pub telemetry_basic_retrieve_my_own: String, - #[serde(default)] - pub code_completion_models: IndexMap, - #[serde(default)] - #[serde(alias = "completion_model")] - pub code_completion_default_model: String, - #[serde(default)] - #[serde(alias = "multiline_completion_model")] - pub multiline_code_completion_default_model: String, - #[serde(default = "default_code_completion_n_ctx")] - #[serde(alias = "completion_n_ctx")] - pub code_completion_n_ctx: usize, - #[serde(default)] - pub code_chat_models: IndexMap, - #[serde(default)] - #[serde(alias = "chat_model")] - pub code_chat_default_model: String, - #[serde(default)] - pub models_dict_patch: HashMap, - #[serde(default)] - #[serde(alias = "default_embeddings_model")] - pub embedding_model: String, - #[serde(default)] - #[serde(alias = "embedding_endpoint")] - pub endpoint_embeddings_template: String, - #[serde(default = "default_endpoint_embeddings_style")] - #[serde(alias = "embedding_endpoint_style")] - pub endpoint_embeddings_style: String, - #[serde(default)] - #[serde(alias = "size_embeddings")] - pub embedding_size: i32, - #[serde(default)] - pub embedding_batch: usize, - #[serde(default)] - pub embedding_n_ctx: usize, - #[serde(default)] - pub running_models: Vec, // check there if a model is available or not, not in other places - #[serde(default)] - pub caps_version: i64, // need to reload if it increases on server, that happens when server configuration changes - #[serde(default)] - pub code_chat_default_system_prompt: String, - - #[serde(default)] - pub customization: String, // on self-hosting server, allows to customize yaml_configs & friends for all engineers - - #[serde(default = "default_support_metadata")] - pub support_metadata: bool, -} - -#[derive(Debug, Deserialize, Clone, Default)] -pub struct CodeAssistantCapsCompletion { - pub endpoint: String, - pub models: IndexMap, - pub default_model: String, - pub default_multiline_model: String, -} - -#[derive(Debug, Deserialize, Clone, Default)] -pub struct CodeAssistantCapsChat { - pub endpoint: String, - pub models: IndexMap, - pub default_model: String, -} - -#[derive(Debug, Deserialize, Clone, Default)] -pub struct CodeAssistantCapsEmbedding { - pub endpoint: String, - pub models: IndexMap, - pub default_model: String, -} - -#[derive(Debug, Deserialize, Clone, Default)] -pub struct CodeAssistantCapsTelemetryEndpoints { - pub telemetry_basic_endpoint: String, - pub telemetry_corrected_snippets_endpoint: String, - pub telemetry_basic_retrieve_my_own_endpoint: String, -} - -#[derive(Debug, Deserialize, Clone, Default)] -pub struct CodeAssistantCapsV2 { - pub cloud_name: String, - - pub completion: CodeAssistantCapsCompletion, - pub chat: CodeAssistantCapsChat, - pub embedding: CodeAssistantCapsEmbedding, - - pub telemetry_endpoints: CodeAssistantCapsTelemetryEndpoints, - pub tokenizer_endpoints: HashMap, - - #[serde(default)] - pub customization: String, - #[serde(default)] - pub default_system_prompt: String, - - pub caps_version: i64, -} - -fn load_caps_from_buf( - buffer: &String, - caps_url: &String, -) -> Result>, String> { - let mut r1_mb_error_text = "".to_string(); - - let r1_mb: Option = match serde_json::from_str(&buffer) { - Ok(v) => v, - Err(e) => { - // incorrect json - if buffer.trim_start().starts_with(&['{', '[']) { - r1_mb_error_text = format!("{}", e); - None - } else { - match serde_yaml::from_str(&buffer) { - Ok(v) => v, - Err(e) => { - r1_mb_error_text = format!("{}", e); - None - } - } - } - } - }; - let mut r1 = r1_mb.ok_or(format!("failed to parse caps: {}", r1_mb_error_text))?; - - let r0: ModelsOnly = serde_json::from_str(&KNOWN_MODELS).map_err(|e| { - let up_to_line = KNOWN_MODELS.lines().take(e.line()).collect::>().join("\n"); - error!("{}\nfailed to parse KNOWN_MODELS: {}", up_to_line, e); - format!("failed to parse KNOWN_MODELS: {}", e) - })?; - - if !r1.code_chat_default_model.is_empty() && !r1.running_models.contains(&r1.code_chat_default_model) { - r1.running_models.push(r1.code_chat_default_model.clone()); - } - if !r1.code_completion_default_model.is_empty() && !r1.running_models.contains(&r1.code_completion_default_model) { - r1.running_models.push(r1.code_completion_default_model.clone()); - } - if !r1.multiline_code_completion_default_model.is_empty() && !r1.running_models.contains(&r1.multiline_code_completion_default_model) { - r1.running_models.push(r1.multiline_code_completion_default_model.clone()); - } - if !r1.embedding_model.is_empty() && !r1.running_models.contains(&r1.embedding_model) { - r1.running_models.push(r1.embedding_model.clone()); - } - - _inherit_r1_from_r0(&mut r1, &r0); - apply_models_dict_patch(&mut r1); - r1.endpoint_template = relative_to_full_url(&caps_url, &r1.endpoint_template)?; - r1.endpoint_chat_passthrough = relative_to_full_url(&caps_url, &r1.endpoint_chat_passthrough)?; - if r1.endpoint_chat_passthrough.is_empty() { - r1.endpoint_chat_passthrough = relative_to_full_url(&caps_url, &r1.chat_endpoint)?; - } - r1.telemetry_basic_dest = relative_to_full_url(&caps_url, &r1.telemetry_basic_dest)?; - r1.telemetry_basic_retrieve_my_own = relative_to_full_url(&caps_url, &r1.telemetry_basic_retrieve_my_own)?; - r1.endpoint_embeddings_template = relative_to_full_url(&caps_url, &r1.endpoint_embeddings_template)?; - r1.tokenizer_path_template = relative_to_full_url(&caps_url, &r1.tokenizer_path_template)?; - if r1.embedding_n_ctx == 0 { - r1.embedding_n_ctx = 512; - } - - // info!("caps {} completion models", r1.code_completion_models.len()); - // info!("caps default completion model: \"{}\"", r1.code_completion_default_model); - // info!("caps {} chat models", r1.code_chat_models.len()); - // info!("caps default chat model: \"{}\"", r1.code_chat_default_model); - // info!("running models: {:?}", r1.running_models); - // info!("code_chat_models models: {:?}", r1.code_chat_models); - // info!("code completion models: {:?}", r1.code_completion_models); - Ok(Arc::new(StdRwLock::new(r1))) -} - -fn load_caps_from_buf_v2( - buffer: &String, - caps_url: &String, -) -> Result>, String> { - // Try to parse as V2 format - let caps_v2: CodeAssistantCapsV2 = match serde_json::from_str(buffer) { - Ok(v) => v, - Err(_) => return Err("failed to load in v2 format".to_string()), - }; - - // Convert V2 to V1 format - let mut caps = CodeAssistantCaps { - cloud_name: caps_v2.cloud_name, - endpoint_style: "openai".to_string(), - chat_endpoint_style: "openai".to_string(), - completion_endpoint_style: "openai".to_string(), - endpoint_embeddings_style: "openai".to_string(), - - // Completion related fields - completion_endpoint: relative_to_full_url(&caps_url, &caps_v2.completion.endpoint)?, - code_completion_models: caps_v2.completion.models.clone(), - code_completion_default_model: caps_v2.completion.default_model.clone(), - multiline_code_completion_default_model: caps_v2.completion.default_multiline_model.clone(), - - // Chat related fields - chat_endpoint: relative_to_full_url(&caps_url, &caps_v2.completion.endpoint)?, // for completion-based chat - endpoint_chat_passthrough: relative_to_full_url(&caps_url, &caps_v2.chat.endpoint)?, - code_chat_models: caps_v2.chat.models.clone(), - code_chat_default_model: caps_v2.chat.default_model.clone(), - - // Embeddings related fields - endpoint_embeddings_template: relative_to_full_url(&caps_url, &caps_v2.embedding.endpoint)?, - embedding_model: caps_v2.embedding.default_model.clone(), - embedding_n_ctx: caps_v2.embedding.models.get(&caps_v2.embedding.default_model).cloned().unwrap_or_default().n_ctx, - embedding_size: caps_v2.embedding.models.get(&caps_v2.embedding.default_model).cloned().unwrap_or_default().size, - - // Telemetry endpoints - telemetry_basic_dest: relative_to_full_url(&caps_url, &caps_v2.telemetry_endpoints.telemetry_basic_endpoint)?, - telemetry_basic_retrieve_my_own: relative_to_full_url(&caps_url, &caps_v2.telemetry_endpoints.telemetry_basic_retrieve_my_own_endpoint)?, - - tokenizer_path_template: "".to_string(), - tokenizer_rewrite_path: { - let mut rewritten_paths = HashMap::new(); - for (key, endpoint) in caps_v2.tokenizer_endpoints { - let full_url = relative_to_full_url(&caps_url, &endpoint)?; - rewritten_paths.insert(key, full_url); - } - rewritten_paths - }, - - // Version - caps_version: caps_v2.caps_version, - - // Collect all models from completion and chat sections - running_models: { - let mut models = std::collections::HashSet::new(); - models.extend(caps_v2.completion.models.keys().cloned()); - models.extend(caps_v2.chat.models.keys().cloned()); - // models.extend(caps_v2.embedding.models.keys().cloned()); - models.into_iter().collect() - }, - - customization: caps_v2.customization.clone(), - code_chat_default_system_prompt: caps_v2.default_system_prompt.clone(), - - ..Default::default() - }; - - // Convert relative URLs to absolute URLs - caps.endpoint_embeddings_template = relative_to_full_url(&caps_url, &caps.endpoint_embeddings_template)?; - caps.chat_endpoint = relative_to_full_url(&caps_url, &caps.chat_endpoint)?; - caps.telemetry_basic_dest = relative_to_full_url(&caps_url, &caps.telemetry_basic_dest)?; - caps.telemetry_basic_retrieve_my_own = relative_to_full_url(&caps_url, &caps.telemetry_basic_retrieve_my_own)?; - - // Set default embedding context size if not set - if caps.embedding_n_ctx == 0 { - caps.embedding_n_ctx = 512; - } - - Ok(Arc::new(StdRwLock::new(caps))) -} - -macro_rules! get_api_key_macro { - ($gcx:expr, $caps:expr, $field:ident) => {{ - let cx_locked = $gcx.read().await; - let custom_apikey = $caps.read().unwrap().$field.clone(); - if custom_apikey.is_empty() { - cx_locked.cmdline.api_key.clone() - } else if custom_apikey.starts_with("$") { - let env_var_name = &custom_apikey[1..]; - match std::env::var(env_var_name) { - Ok(env_value) => env_value, - Err(e) => { - error!("tried to read API key from env var {}, but failed: {}\nTry editing ~/.config/refact/bring-your-own-key.yaml", env_var_name, e); - cx_locked.cmdline.api_key.clone() - } - } - } else { - custom_apikey - } - }}; -} - -pub async fn get_api_key( - gcx: Arc>, - use_this_fall_back_to_default_if_empty: String, -) -> String { - let gcx_locked = gcx.write().await; - if use_this_fall_back_to_default_if_empty.is_empty() { - gcx_locked.cmdline.api_key.clone() - } else if use_this_fall_back_to_default_if_empty.starts_with("$") { - let env_var_name = &use_this_fall_back_to_default_if_empty[1..]; - match std::env::var(env_var_name) { - Ok(env_value) => env_value, - Err(e) => { - error!("tried to read API key from env var {}, but failed: {}\nTry editing ~/.config/refact/bring-your-own-key.yaml", env_var_name, e); - gcx_locked.cmdline.api_key.clone() - } - } - } else { - use_this_fall_back_to_default_if_empty - } -} - -#[allow(dead_code)] -async fn get_custom_chat_api_key(gcx: Arc>) -> Result { - let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - Ok(get_api_key_macro!(gcx, caps, chat_apikey)) -} - -#[cfg(feature="vecdb")] -pub async fn get_custom_embedding_api_key(gcx: Arc>) -> Result { - let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - Ok(get_api_key_macro!(gcx, caps, embedding_apikey)) -} - -#[allow(dead_code)] -async fn get_custom_completion_api_key(gcx: Arc>) -> Result { - let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - Ok(get_api_key_macro!(gcx, caps, completion_apikey)) -} - - -async fn load_caps_buf_from_file( - cmdline: crate::global_context::CommandLine, - gcx: Arc>, -) -> Result<(String, String), String> { - let mut caps_url = cmdline.address_url.clone(); - if caps_url.is_empty() { - let config_dir = { - let gcx_locked = gcx.read().await; - gcx_locked.config_dir.clone() - }; - let caps_path = PathBuf::from(config_dir).join("bring-your-own-key.yaml"); - caps_url = caps_path.to_string_lossy().into_owned(); - // info!("will use {} as the caps file", caps_url); - } - let mut buffer = String::new(); - let mut file = File::open(caps_url.clone()).map_err(|_| format!("failed to open file '{}'", caps_url))?; - file.read_to_string(&mut buffer).map_err(|_| format!("failed to read file '{}'", caps_url))?; - Ok((buffer, caps_url)) -} - -async fn load_caps_buf_from_url( - cmdline: crate::global_context::CommandLine, - gcx: Arc>, -) -> Result<(String, String), String> { - let mut buffer = String::new(); - let mut caps_urls: Vec = Vec::new(); - if cmdline.address_url.to_lowercase() == "refact" { - caps_urls.push("https://inference.smallcloud.ai/coding_assistant_caps.json".to_string()); - } else { - let base_url = Url::parse(&cmdline.address_url.clone()).map_err(|_| "failed to parse address url (1)".to_string())?; - let joined_url = base_url.join(&CAPS_FILENAME).map_err(|_| "failed to parse address url (2)".to_string())?; - let joined_url_fallback = base_url.join(&CAPS_FILENAME_FALLBACK).map_err(|_| "failed to parse address url (2)".to_string())?; - caps_urls.push(joined_url.to_string()); - caps_urls.push(joined_url_fallback.to_string()); - } - - let http_client = gcx.read().await.http_client.clone(); - let api_key = cmdline.api_key.clone(); - let mut headers = reqwest::header::HeaderMap::new(); - if !api_key.is_empty() { - headers.insert(reqwest::header::AUTHORIZATION, reqwest::header::HeaderValue::from_str(format!("Bearer {}", api_key).as_str()).unwrap()); - headers.insert(reqwest::header::USER_AGENT, reqwest::header::HeaderValue::from_str(format!("refact-lsp {}", crate::version::build_info::PKG_VERSION).as_str()).unwrap()); - } - - let mut status: u16 = 0; - for url in caps_urls.iter() { - info!("fetching caps from {}", url); - let response = http_client.get(url).headers(headers.clone()).send().await.map_err(|e| format!("{}", e))?; - status = response.status().as_u16(); - buffer = match response.text().await { - Ok(v) => v, - Err(_) => continue - }; - - if status == 200 { - break; - } - - warn!("status={}; server responded with:\n{}", status, buffer); - } - if status != 200 { - let response_json: serde_json::Result = serde_json::from_str(&buffer); - return if let Ok(response_json) = response_json { - if let Some(detail) = response_json.get("detail") { - Err(detail.as_str().unwrap().to_string()) - } else { - Err(format!("cannot fetch caps, status={}", status)) - } - } else { - Err(format!("cannot fetch caps, status={}", status)) - }; - } - - let caps_url: String = match caps_urls.get(0) { - Some(u) => u.clone(), - None => return Err("caps_url is none".to_string()) - }; - - Ok((buffer, caps_url)) -} - -pub async fn load_caps( - cmdline: crate::global_context::CommandLine, - gcx: Arc>, -) -> Result>, String> { - let mut caps_url = cmdline.address_url.clone(); - let buf: String; - if caps_url.to_lowercase() == "refact" || caps_url.starts_with("http") { - (buf, caps_url) = load_caps_buf_from_url(cmdline, gcx).await? - } else { - (buf, caps_url) = load_caps_buf_from_file(cmdline, gcx).await? - } - match load_caps_from_buf_v2(&buf, &caps_url) { - Ok(caps) => Ok(caps), - Err(e) => { - info!("Cannot load v2 caps: `{}`, try old format", e); - load_caps_from_buf(&buf, &caps_url) - } - } -} - -pub fn strip_model_from_finetune(model: &String) -> String { - model.split(":").next().unwrap().to_string() -} - -fn relative_to_full_url( - caps_url: &String, - maybe_relative_url: &str, -) -> Result { - if maybe_relative_url.starts_with("http") { - Ok(maybe_relative_url.to_string()) - } else if maybe_relative_url.is_empty() { - Ok("".to_string()) - } else { - let base_url = Url::parse(caps_url.as_str()).map_err(|_| "failed to parse address url (3)".to_string())?; - let joined_url = base_url.join(maybe_relative_url).map_err(|_| "failed to join URL \"{}\" and possibly relative \"{}\"".to_string())?; - Ok(joined_url.to_string()) - } -} - -fn apply_models_dict_patch(caps: &mut CodeAssistantCaps) { - fn apply_model_record_patch(rec: &mut ModelRecord, rec_patched: &ModelRecord) { - if rec_patched.n_ctx != 0 { - rec.n_ctx = rec_patched.n_ctx; - } - if rec_patched.supports_tools { - rec.supports_tools = rec_patched.supports_tools; - } - if rec_patched.supports_multimodality { - rec.supports_multimodality = rec_patched.supports_multimodality; - } - if rec_patched.supports_tools { - rec.supports_tools = rec_patched.supports_tools; - } - } - - for (model, rec_patched) in caps.models_dict_patch.iter() { - if let Some(rec) = caps.code_completion_models.get_mut(model) { - apply_model_record_patch(rec, rec_patched); - } - if let Some(rec) = caps.code_chat_models.get_mut(model) { - apply_model_record_patch(rec, rec_patched); - } - } -} - -fn _inherit_r1_from_r0( - r1: &mut CodeAssistantCaps, - r0: &ModelsOnly, -) { - // XXX: only patches running models, patch all? - for k in r1.running_models.iter() { - let k_stripped = strip_model_from_finetune(k); - - for (rec_name, rec) in r0.code_completion_models.iter() { - if rec_name == &k_stripped || rec.similar_models.contains(&k_stripped) { - r1.code_completion_models.insert(k.to_string(), rec.clone()); - } - } - - for (rec_name, rec) in r0.code_chat_models.iter() { - if rec_name == &k_stripped || rec.similar_models.contains(&k_stripped) { - r1.code_chat_models.insert(k.to_string(), rec.clone()); - } - } - } - - for k in r1.running_models.iter() { - if !r1.code_completion_models.contains_key(k) && !r1.code_chat_models.contains_key(k) && *k != r1.embedding_model { - warn!("indicated as running, unknown model {:?}, maybe update this rust binary", k); - } - } - - for k in r0.tokenizer_rewrite_path.keys() { - if !r1.tokenizer_rewrite_path.contains_key(k) { - r1.tokenizer_rewrite_path.insert(k.to_string(), r0.tokenizer_rewrite_path[k].clone()); - } - } -} - -pub fn which_model_to_use<'a>( - models: &'a IndexMap, - user_wants_model: &str, - default_model: &str, -) -> Result<(String, &'a ModelRecord), String> { - let mut take_this_one = default_model; - if user_wants_model != "" { - take_this_one = user_wants_model; - } - let no_finetune = strip_model_from_finetune(&take_this_one.to_string()); - if let Some(model_rec) = models.get(&take_this_one.to_string()) { - Ok((take_this_one.to_string(), model_rec)) - } else if let Some(model_rec) = models.get(&no_finetune) { - Ok((take_this_one.to_string(), model_rec)) - } else { - Err(format!( - "Model '{}' not found. Server has these models: {:?}", - take_this_one, - models.keys() - )) - } -} - -pub fn which_scratchpad_to_use<'a>( - scratchpads: &'a HashMap, - user_wants_scratchpad: &str, - default_scratchpad: &str, -) -> Result<(String, &'a serde_json::Value), String> { - let mut take_this_one = default_scratchpad; - if user_wants_scratchpad != "" { - take_this_one = user_wants_scratchpad; - } - if default_scratchpad == "" { - if scratchpads.len() == 1 { - let key = scratchpads.keys().next().unwrap(); - return Ok((key.clone(), &scratchpads[key])); - } else { - return Err(format!( - "There is no default scratchpad defined, requested scratchpad is empty. The model supports these scratchpads: {:?}", - scratchpads.keys() - )); - } - } - if let Some(scratchpad_patch) = scratchpads.get(take_this_one) { - return Ok((take_this_one.to_string(), scratchpad_patch)); - } else { - return Err(format!( - "Scratchpad '{}' not found. The model supports these scratchpads: {:?}", - take_this_one, - scratchpads.keys() - )); - } -} - -pub async fn get_model_record( - gcx: Arc>, - model: &str, -) -> Result { - let caps = crate::global_context::try_load_caps_quickly_if_not_present( - gcx.clone(), 0, - ).await.map_err(|e| { - warn!("no caps: {:?}", e); - format!("failed to load caps: {}", e) - })?; - - let caps_lock = caps.read().unwrap(); - match caps_lock.code_chat_models.get(model) { - Some(res) => Ok(res.clone()), - None => Err(format!("no model record for model `{}`", model)) - } -} - - -pub const BRING_YOUR_OWN_KEY_SAMPLE: &str = r#" -cloud_name: My own mix of clouds! - -chat_endpoint: "https://api.openai.com/v1/chat/completions" -chat_apikey: "$OPENAI_API_KEY" # Will work if you have it in global environment variables, but better use the real sk-... key -chat_model: gpt-4o-mini - -embedding_endpoint: "https://api.openai.com/v1/embeddings" -embedding_apikey: "$OPENAI_API_KEY" -embedding_model: text-embedding-3-small -embedding_size: 1536 - -# completion_endpoint: "https://api-inference.huggingface.co/models/$MODEL" -# completion_endpoint_style: "hf" -# completion_apikey: "hf_..." # or use $HF_TOKEN if you have it in global environment variables -# completion_model: bigcode/starcoder2-3b - -running_models: # all models mentioned in *_model are automatically running, but you can add more - - gpt-4o-mini - - gpt-4o - -# More examples https://github.com/smallcloudai/refact-lsp/tree/dev/bring_your_own_key - -# Refact sends basic telemetry (counters and errors), you can send it to a different address (a Refact self-hosting server is especially useful) or set to an empty string for no telemetry. -# telemetry_basic_dest: # default: https://www.smallcloud.ai/v1/telemetry-basic -# telemetry_basic_retrieve_my_own: # default: https://www.smallcloud.ai/v1/telemetry-retrieve-my-own-stats -"#; \ No newline at end of file diff --git a/refact-agent/engine/src/caps/caps.rs b/refact-agent/engine/src/caps/caps.rs new file mode 100644 index 000000000..208178079 --- /dev/null +++ b/refact-agent/engine/src/caps/caps.rs @@ -0,0 +1,432 @@ +use std::sync::Arc; + +use indexmap::IndexMap; +use serde::Deserialize; +use serde::Serialize; +use tokio::sync::RwLock as ARwLock; +use url::Url; +use tracing::{info, warn}; + +use crate::custom_error::MapErrToString; +use crate::global_context::CommandLine; +use crate::global_context::GlobalContext; +use crate::caps::providers::{add_models_to_caps, read_providers_d, resolve_provider_api_key, + post_process_provider, CapsProvider}; +use crate::caps::self_hosted::SelfHostedCaps; + +pub const CAPS_FILENAME: &str = "refact-caps"; +pub const CAPS_FILENAME_FALLBACK: &str = "coding_assistant_caps.json"; + +#[derive(Debug, Serialize, Clone, Deserialize, Default, PartialEq)] +pub struct BaseModelRecord { + #[serde(default)] + pub n_ctx: usize, + + /// Actual model name, e.g. "gpt-4o" + #[serde(default)] + pub name: String, + /// provider/model_name, e.g. "openai/gpt-4o" + #[serde(skip_deserializing)] + pub id: String, + + #[serde(default, skip_serializing)] + pub endpoint: String, + #[serde(default, skip_serializing)] + pub endpoint_style: String, + #[serde(default, skip_serializing)] + pub api_key: String, + #[serde(default, skip_serializing)] + pub tokenizer_api_key: String, + + #[serde(default, skip_serializing)] + pub support_metadata: bool, + #[serde(default, skip_serializing)] + pub similar_models: Vec, + #[serde(default)] + pub tokenizer: String, + + #[serde(default = "default_true")] + pub enabled: bool, + // Fields used for Config/UI management + #[serde(skip_deserializing)] + pub removable: bool, + #[serde(skip_deserializing)] + pub user_configured: bool, +} + +fn default_true() -> bool { true } + +pub trait HasBaseModelRecord { + fn base(&self) -> &BaseModelRecord; + fn base_mut(&mut self) -> &mut BaseModelRecord; +} + +#[derive(Debug, Serialize, Clone, Deserialize, Default)] +pub struct ChatModelRecord { + #[serde(flatten)] + pub base: BaseModelRecord, + + #[serde(default = "default_chat_scratchpad", skip_serializing)] + pub scratchpad: String, + #[serde(default, skip_serializing)] + pub scratchpad_patch: serde_json::Value, + + #[serde(default)] + pub supports_tools: bool, + #[serde(default)] + pub supports_multimodality: bool, + #[serde(default)] + pub supports_clicks: bool, + #[serde(default)] + pub supports_agent: bool, + #[serde(default)] + pub supports_reasoning: Option, + #[serde(default)] + pub supports_boost_reasoning: bool, + #[serde(default)] + pub default_temperature: Option, +} + +pub fn default_chat_scratchpad() -> String { "PASSTHROUGH".to_string() } + +impl HasBaseModelRecord for ChatModelRecord { + fn base(&self) -> &BaseModelRecord { &self.base } + fn base_mut(&mut self) -> &mut BaseModelRecord { &mut self.base } +} + +#[derive(Debug, Serialize, Clone, Deserialize, Default)] +pub struct CompletionModelRecord { + #[serde(flatten)] + pub base: BaseModelRecord, + + #[serde(default = "default_completion_scratchpad")] + pub scratchpad: String, + #[serde(default = "default_completion_scratchpad_patch")] + pub scratchpad_patch: serde_json::Value, + + pub model_family: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompletionModelFamily { + #[serde(rename = "qwen2.5-coder-base")] + Qwen2_5CoderBase, + #[serde(rename = "starcoder")] + Starcoder, + #[serde(rename = "deepseek-coder")] + DeepseekCoder, +} + +impl CompletionModelFamily { + pub fn to_string(self) -> String { + serde_json::to_value(self).ok() + .and_then(|v| v.as_str().map(|s| s.to_string())).unwrap_or_default() + } + + pub fn all_variants() -> Vec { + vec![ + CompletionModelFamily::Qwen2_5CoderBase, + CompletionModelFamily::Starcoder, + CompletionModelFamily::DeepseekCoder, + ] + } +} + +pub fn default_completion_scratchpad() -> String { "REPLACE_PASSTHROUGH".to_string() } + +pub fn default_completion_scratchpad_patch() -> serde_json::Value { serde_json::json!({ + "context_format": "chat", + "rag_ratio": 0.5 +}) } + +impl HasBaseModelRecord for CompletionModelRecord { + fn base(&self) -> &BaseModelRecord { &self.base } + fn base_mut(&mut self) -> &mut BaseModelRecord { &mut self.base } +} + +#[derive(Debug, Serialize, Clone, Default, PartialEq)] +pub struct EmbeddingModelRecord { + #[serde(flatten)] + pub base: BaseModelRecord, + + pub embedding_size: i32, + pub rejection_threshold: f32, + pub embedding_batch: usize, +} + +pub fn default_rejection_threshold() -> f32 { 0.63 } + +pub fn default_embedding_batch() -> usize { 64 } + +impl HasBaseModelRecord for EmbeddingModelRecord { + fn base(&self) -> &BaseModelRecord { &self.base } + fn base_mut(&mut self) -> &mut BaseModelRecord { &mut self.base } +} + +impl EmbeddingModelRecord { + pub fn is_configured(&self) -> bool { + !self.base.name.is_empty() && (self.embedding_size > 0 || self.embedding_batch > 0 || self.base.n_ctx > 0) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct CodeAssistantCaps { + #[serde(deserialize_with = "normalize_string")] + pub cloud_name: String, // "refact" or "refact_self_hosted" + + #[serde(default = "default_telemetry_basic_dest")] + pub telemetry_basic_dest: String, + #[serde(default = "default_telemetry_retrieve_my_own")] + pub telemetry_basic_retrieve_my_own: String, + + #[serde(skip_deserializing)] + pub completion_models: IndexMap>, // keys are "provider/model" + #[serde(skip_deserializing)] + pub chat_models: IndexMap>, + #[serde(skip_deserializing)] + pub embedding_model: EmbeddingModelRecord, + + #[serde(flatten, skip_deserializing)] + pub defaults: DefaultModels, + + #[serde(default)] + pub caps_version: i64, // need to reload if it increases on server, that happens when server configuration changes + + #[serde(default)] + pub customization: String, // on self-hosting server, allows to customize yaml_configs & friends for all engineers + + #[serde(default = "default_hf_tokenizer_template")] + pub hf_tokenizer_template: String, // template for HuggingFace tokenizer URLs +} + +fn default_telemetry_retrieve_my_own() -> String { + "https://www.smallcloud.ai/v1/telemetry-retrieve-my-own-stats".to_string() +} + +pub fn default_hf_tokenizer_template() -> String { + "https://huggingface.co/$HF_MODEL/resolve/main/tokenizer.json".to_string() +} + +fn default_telemetry_basic_dest() -> String { + "https://www.smallcloud.ai/v1/telemetry-basic".to_string() +} + +pub fn normalize_string<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result { + let s: String = String::deserialize(deserializer)?; + Ok(s.chars().map(|c| if c.is_alphanumeric() { c.to_ascii_lowercase() } else { '_' }).collect()) +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct DefaultModels { + #[serde(default, alias = "code_completion_default_model", alias = "completion_model")] + pub completion_default_model: String, + #[serde(default, alias = "code_chat_default_model", alias = "chat_model")] + pub chat_default_model: String, + #[serde(default)] + pub chat_thinking_model: String, + #[serde(default)] + pub chat_light_model: String, +} + +impl DefaultModels { + pub fn apply_override(&mut self, other: &DefaultModels, provider_name: Option<&str>) { + if !other.completion_default_model.is_empty() { + self.completion_default_model = match provider_name { + Some(provider) => format!("{}/{}", provider, other.completion_default_model), + None => other.completion_default_model.clone(), + }; + } + if !other.chat_default_model.is_empty() { + self.chat_default_model = match provider_name { + Some(provider) => format!("{}/{}", provider, other.chat_default_model), + None => other.chat_default_model.clone(), + }; + } + if !other.chat_thinking_model.is_empty() { + self.chat_thinking_model = match provider_name { + Some(provider) => format!("{}/{}", provider, other.chat_thinking_model), + None => other.chat_thinking_model.clone(), + }; + } + if !other.chat_light_model.is_empty() { + self.chat_light_model = match provider_name { + Some(provider) => format!("{}/{}", provider, other.chat_light_model), + None => other.chat_light_model.clone(), + }; + } + } +} + +pub async fn load_caps_value_from_url( + cmdline: CommandLine, + gcx: Arc>, +) -> Result<(serde_json::Value, String), String> { + let caps_urls = if cmdline.address_url.to_lowercase() == "refact" { + vec!["https://inference.smallcloud.ai/coding_assistant_caps.json".to_string()] + } else { + let base_url = Url::parse(&cmdline.address_url) + .map_err(|_| "failed to parse address url".to_string())?; + + vec![ + base_url.join(&CAPS_FILENAME).map_err(|_| "failed to join caps URL".to_string())?.to_string(), + base_url.join(&CAPS_FILENAME_FALLBACK).map_err(|_| "failed to join fallback caps URL".to_string())?.to_string(), + ] + }; + + let http_client = gcx.read().await.http_client.clone(); + let mut headers = reqwest::header::HeaderMap::new(); + + if !cmdline.api_key.is_empty() { + headers.insert(reqwest::header::AUTHORIZATION, reqwest::header::HeaderValue::from_str(&format!("Bearer {}", cmdline.api_key)).unwrap()); + headers.insert(reqwest::header::USER_AGENT, reqwest::header::HeaderValue::from_str(&format!("refact-lsp {}", crate::version::build_info::PKG_VERSION)).unwrap()); + } + + let mut last_status = 0; + let mut last_response_json: Option = None; + + for url in &caps_urls { + info!("fetching caps from {}", url); + let response = http_client.get(url) + .headers(headers.clone()) + .send() + .await + .map_err(|e| e.to_string())?; + + last_status = response.status().as_u16(); + + if let Ok(json_value) = response.json::().await { + if last_status == 200 { + return Ok((json_value, url.clone())); + } + last_response_json = Some(json_value.clone()); + warn!("status={}; server responded with:\n{}", last_status, json_value); + } + } + + if let Some(json_value) = last_response_json { + if let Some(detail) = json_value.get("detail").and_then(|d| d.as_str()) { + return Err(detail.to_string()); + } + } + + Err(format!("cannot fetch caps, status={}", last_status)) +} + +pub async fn load_caps( + cmdline: crate::global_context::CommandLine, + gcx: Arc>, +) -> Result, String> { + let (config_dir, cmdline_api_key) = { + let gcx_locked = gcx.read().await; + (gcx_locked.config_dir.clone(), gcx_locked.cmdline.api_key.clone()) + }; + + let (caps_value, caps_url) = load_caps_value_from_url(cmdline, gcx).await?; + + let (mut caps, server_providers) = match serde_json::from_value::(caps_value.clone()) { + Ok(self_hosted_caps) => (self_hosted_caps.into_caps(&caps_url, &cmdline_api_key)?, Vec::new()), + Err(_) => { + let caps = serde_json::from_value::(caps_value.clone()) + .map_err_with_prefix("Failed to parse caps:")?; + let mut server_provider = serde_json::from_value::(caps_value) + .map_err_with_prefix("Failed to parse caps provider:")?; + resolve_relative_urls(&mut server_provider, &caps_url)?; + (caps, vec![server_provider]) + } + }; + + caps.telemetry_basic_dest = relative_to_full_url(&caps_url, &caps.telemetry_basic_dest)?; + caps.telemetry_basic_retrieve_my_own = relative_to_full_url(&caps_url, &caps.telemetry_basic_retrieve_my_own)?; + + let (mut providers, error_log) = read_providers_d(server_providers, &config_dir).await; + providers.retain(|p| p.enabled); + for e in error_log { + tracing::error!("{e}"); + } + for provider in &mut providers { + post_process_provider(provider, false); + provider.api_key = resolve_provider_api_key(&provider, &cmdline_api_key); + } + add_models_to_caps(&mut caps, providers); + + Ok(Arc::new(caps)) +} + +pub fn resolve_relative_urls(provider: &mut CapsProvider, caps_url: &str) -> Result<(), String> { + provider.chat_endpoint = relative_to_full_url(caps_url, &provider.chat_endpoint)?; + provider.completion_endpoint = relative_to_full_url(caps_url, &provider.completion_endpoint)?; + provider.embedding_endpoint = relative_to_full_url(caps_url, &provider.embedding_endpoint)?; + Ok(()) +} + +pub fn strip_model_from_finetune(model: &str) -> String { + model.split(":").next().unwrap().to_string() +} + +pub fn relative_to_full_url( + caps_url: &str, + maybe_relative_url: &str, +) -> Result { + if maybe_relative_url.starts_with("http") { + Ok(maybe_relative_url.to_string()) + } else if maybe_relative_url.is_empty() { + Ok("".to_string()) + } else { + let base_url = Url::parse(caps_url) + .map_err(|_| format!("failed to parse caps url: {}", caps_url))?; + let joined_url = base_url.join(maybe_relative_url) + .map_err(|_| format!("failed to join url: {}", maybe_relative_url))?; + Ok(joined_url.to_string()) + } +} + +pub fn resolve_model<'a, T>( + models: &'a IndexMap>, + model_id: &str, +) -> Result, String> { + models.get(model_id).or_else( + || models.get(&strip_model_from_finetune(model_id)) + ).cloned().ok_or(format!("Model '{}' not found. Server has the following models: {:?}", model_id, models.keys())) +} + +pub fn resolve_chat_model<'a>( + caps: Arc, + requested_model_id: &str, +) -> Result, String> { + let model_id = if !requested_model_id.is_empty() { + requested_model_id + } else { + &caps.defaults.chat_default_model + }; + resolve_model(&caps.chat_models, model_id) +} + +pub fn resolve_completion_model<'a>( + caps: Arc, + requested_model_id: &str, + try_refact_fallbacks: bool, +) -> Result, String> { + let model_id = if !requested_model_id.is_empty() { + requested_model_id + } else { + &caps.defaults.completion_default_model + }; + + match resolve_model(&caps.completion_models, model_id) { + Ok(model) => Ok(model), + Err(first_err) if try_refact_fallbacks => { + if let Ok(model) = resolve_model(&caps.completion_models, &format!("refact/{model_id}")) { + return Ok(model); + } + if let Ok(model) = resolve_model(&caps.completion_models, &format!("refact_self_hosted/{model_id}")) { + return Ok(model); + } + Err(first_err) + } + Err(err) => Err(err), + } +} + +pub fn is_cloud_model(model_id: &str) -> bool { + model_id.starts_with("refact/") +} diff --git a/refact-agent/engine/src/caps/mod.rs b/refact-agent/engine/src/caps/mod.rs new file mode 100644 index 000000000..bc3e848db --- /dev/null +++ b/refact-agent/engine/src/caps/mod.rs @@ -0,0 +1,5 @@ +pub mod caps; +pub mod providers; +pub mod self_hosted; + +pub use caps::*; diff --git a/refact-agent/engine/src/caps/providers.rs b/refact-agent/engine/src/caps/providers.rs new file mode 100644 index 000000000..f8a2b06f8 --- /dev/null +++ b/refact-agent/engine/src/caps/providers.rs @@ -0,0 +1,676 @@ +use std::path::{Path, PathBuf}; +use std::sync::{Arc, OnceLock}; + +use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; +use tokio::sync::RwLock as ARwLock; +use structopt::StructOpt; + +use crate::caps::{ + BaseModelRecord, ChatModelRecord, CodeAssistantCaps, CompletionModelRecord, DefaultModels, + EmbeddingModelRecord, HasBaseModelRecord, default_embedding_batch, default_rejection_threshold, + load_caps_value_from_url, resolve_relative_urls, strip_model_from_finetune, normalize_string +}; +use crate::custom_error::{MapErrToString, YamlError}; +use crate::global_context::{CommandLine, GlobalContext}; +use crate::caps::self_hosted::SelfHostedCaps; + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct CapsProvider { + #[serde(alias = "cloud_name", default, deserialize_with = "normalize_string")] + pub name: String, + #[serde(default = "default_true")] + pub enabled: bool, + #[serde(default = "default_true")] + pub supports_completion: bool, + + #[serde(default = "default_endpoint_style")] + pub endpoint_style: String, + + // This aliases are for backward compatibility with cloud and self-hosted caps + #[serde(default, alias = "endpoint_template")] + pub completion_endpoint: String, + #[serde(default, alias = "endpoint_chat_passthrough")] + pub chat_endpoint: String, + #[serde(default, alias = "endpoint_embeddings_template")] + pub embedding_endpoint: String, + + #[serde(default)] + pub api_key: String, + + #[serde(default)] + pub tokenizer_api_key: String, + + #[serde(default)] + pub code_completion_n_ctx: usize, + + #[serde(default)] + pub support_metadata: bool, + + #[serde(default)] + pub completion_models: IndexMap, + #[serde(default)] + pub chat_models: IndexMap, + #[serde(default, alias = "default_embeddings_model")] + pub embedding_model: EmbeddingModelRecord, + + #[serde(default)] + pub models_dict_patch: IndexMap, // Used to patch some params from cloud, like n_ctx for pro/free users + + #[serde(flatten)] + pub defaults: DefaultModels, + + #[serde(default)] + pub running_models: Vec, +} + +impl CapsProvider { + pub fn apply_override(&mut self, value: serde_yaml::Value) -> Result<(), String> { + set_field_if_exists::(&mut self.enabled, "enabled", &value)?; + set_field_if_exists::(&mut self.endpoint_style, "endpoint_style", &value)?; + set_field_if_exists::(&mut self.completion_endpoint, "completion_endpoint", &value)?; + set_field_if_exists::(&mut self.chat_endpoint, "chat_endpoint", &value)?; + set_field_if_exists::(&mut self.embedding_endpoint, "embedding_endpoint", &value)?; + set_field_if_exists::(&mut self.api_key, "api_key", &value)?; + set_field_if_exists::(&mut self.tokenizer_api_key, "tokenizer_api_key", &value)?; + set_field_if_exists::(&mut self.embedding_model, "embedding_model", &value)?; + if value.get("embedding_model").is_some() { + self.embedding_model.base.removable = true; + self.embedding_model.base.user_configured = true; + } + + extend_model_collection::(&mut self.chat_models, "chat_models", &value, &self.running_models)?; + extend_model_collection::(&mut self.completion_models, "completion_models", &value, &self.running_models)?; + extend_collection::>(&mut self.running_models, "running_models", &value)?; + + match serde_yaml::from_value::(value) { + Ok(default_models) => { + self.defaults.apply_override(&default_models, None); + }, + Err(e) => return Err(e.to_string()), + } + + Ok(()) + } +} + +fn set_field_if_exists serde::Deserialize<'de>>( + target: &mut T, field: &str, value: &serde_yaml::Value +) -> Result<(), String> { + if let Some(val) = value.get(field) { + *target = serde_yaml::from_value(val.clone()) + .map_err(|_| format!("Field '{}' has incorrect type", field))?; + } + Ok(()) +} + +fn extend_collection serde::Deserialize<'de> + Extend + IntoIterator>( + target: &mut C, field: &str, value: &serde_yaml::Value +) -> Result<(), String> { + if let Some(value) = value.get(field) { + let imported_collection = serde_yaml::from_value::(value.clone()) + .map_err(|_| format!("Invalid format for {field}"))?; + + target.extend(imported_collection); + } + Ok(()) +} + +// Special implementation for ChatModelRecord and CompletionModelRecord collections +// that sets removable=true for newly added models +fn extend_model_collection serde::Deserialize<'de> + HasBaseModelRecord>( + target: &mut IndexMap, field: &str, value: &serde_yaml::Value, prev_running_models: &Vec +) -> Result<(), String> { + if let Some(value) = value.get(field) { + let imported_collection = serde_yaml::from_value::>(value.clone()) + .map_err(|_| format!("Invalid format for {field}"))?; + + for (key, mut model) in imported_collection { + model.base_mut().user_configured = true; + if !target.contains_key(&key) && !prev_running_models.contains(&key) { + model.base_mut().removable = true; + } + target.insert(key, model); + } + } + Ok(()) +} + +fn default_endpoint_style() -> String { "openai".to_string() } + +fn default_true() -> bool { true } + +impl<'de> serde::Deserialize<'de> for EmbeddingModelRecord { + fn deserialize>(deserializer: D) -> Result + { + #[derive(Deserialize)] + #[serde(untagged)] + enum Input { + String(String), + Full(EmbeddingModelRecordHelper), + } + + #[derive(Deserialize)] + struct EmbeddingModelRecordHelper { + #[serde(flatten)] + base: BaseModelRecord, + #[serde(default)] + embedding_size: i32, + #[serde(default = "default_rejection_threshold")] + rejection_threshold: f32, + #[serde(default = "default_embedding_batch")] + embedding_batch: usize, + } + + match Input::deserialize(deserializer)? { + Input::String(name) => Ok(EmbeddingModelRecord { + base: BaseModelRecord { name, ..Default::default() }, + ..Default::default() + }), + Input::Full(mut helper) => { + if helper.embedding_batch > 256 { + tracing::warn!("embedding_batch can't be higher than 256"); + helper.embedding_batch = default_embedding_batch(); + } + + Ok(EmbeddingModelRecord { + base: helper.base, + embedding_batch: helper.embedding_batch, + rejection_threshold: helper.rejection_threshold, + embedding_size: helper.embedding_size, + }) + }, + } + } +} + +#[derive(Deserialize, Default, Debug)] +pub struct ModelDefaultSettingsUI { + #[serde(default)] + pub chat: ChatModelRecord, + #[serde(default)] + pub completion: CompletionModelRecord, + #[serde(default)] + pub embedding: EmbeddingModelRecord, +} + +const PROVIDER_TEMPLATES: &[(&str, &str)] = &[ + ("anthropic", include_str!("../yaml_configs/default_providers/anthropic.yaml")), + ("custom", include_str!("../yaml_configs/default_providers/custom.yaml")), + ("deepseek", include_str!("../yaml_configs/default_providers/deepseek.yaml")), + ("google_gemini", include_str!("../yaml_configs/default_providers/google_gemini.yaml")), + ("groq", include_str!("../yaml_configs/default_providers/groq.yaml")), + ("lmstudio", include_str!("../yaml_configs/default_providers/lmstudio.yaml")), + ("ollama", include_str!("../yaml_configs/default_providers/ollama.yaml")), + ("openai", include_str!("../yaml_configs/default_providers/openai.yaml")), + ("openrouter", include_str!("../yaml_configs/default_providers/openrouter.yaml")), + ("xai", include_str!("../yaml_configs/default_providers/xai.yaml")), +]; +static PARSED_PROVIDERS: OnceLock> = OnceLock::new(); +static PARSED_MODEL_DEFAULTS: OnceLock> = OnceLock::new(); + +pub fn get_provider_templates() -> &'static IndexMap { + PARSED_PROVIDERS.get_or_init(|| { + let mut map = IndexMap::new(); + for (name, yaml) in PROVIDER_TEMPLATES { + if let Ok(mut provider) = serde_yaml::from_str::(yaml) { + provider.name = name.to_string(); + map.insert(name.to_string(), provider); + } else { + panic!("Failed to parse template for provider {}", name); + } + } + map + }) +} + +pub fn get_provider_model_default_settings_ui() -> &'static IndexMap { + PARSED_MODEL_DEFAULTS.get_or_init(|| { + let mut map = IndexMap::new(); + for (name, yaml) in PROVIDER_TEMPLATES { + let yaml_value = serde_yaml::from_str::(yaml) + .unwrap_or_else(|_| panic!("Failed to parse YAML for provider {}", name)); + + let model_default_settings_ui_value = yaml_value.get("model_default_settings_ui").cloned() + .expect(&format!("Missing `model_model_default_settings_ui` for provider template {name}")); + let model_default_settings_ui = serde_yaml::from_value(model_default_settings_ui_value) + .unwrap_or_else(|e| panic!("Failed to parse model_defaults for provider {}: {}", name, e)); + + map.insert(name.to_string(), model_default_settings_ui); + } + map + }) +} + +/// Returns yaml files from providers.d directory, and list of errors from reading +/// directory or listing files +pub async fn get_provider_yaml_paths(config_dir: &Path) -> (Vec, Vec) { + let providers_dir = config_dir.join("providers.d"); + let mut yaml_paths = Vec::new(); + let mut errors = Vec::new(); + + let mut entries = match tokio::fs::read_dir(&providers_dir).await { + Ok(entries) => entries, + Err(e) => { + errors.push(format!("Failed to read providers directory: {e}")); + return (yaml_paths, errors); + } + }; + + while let Some(entry_result) = entries.next_entry().await.transpose() { + match entry_result { + Ok(entry) => { + let path = entry.path(); + + if path.is_file() && + path.extension().map_or(false, |ext| ext == "yaml" || ext == "yml") { + yaml_paths.push(path); + } + }, + Err(e) => { + errors.push(format!("Error reading directory entry: {e}")); + } + } + } + + (yaml_paths, errors) +} + +pub fn post_process_provider(provider: &mut CapsProvider, include_disabled_models: bool) { + add_running_models(provider); + populate_model_records(provider); + apply_models_dict_patch(provider); + add_name_and_id_to_model_records(provider); + if !include_disabled_models { + provider.chat_models.retain(|_, model| model.base.enabled); + provider.completion_models.retain(|_, model| model.base.enabled); + } +} + +pub async fn read_providers_d( + prev_providers: Vec, + config_dir: &Path +) -> (Vec, Vec) { + let providers_dir = config_dir.join("providers.d"); + let mut providers = prev_providers; + let mut error_log = Vec::new(); + + let (yaml_paths, read_errors) = get_provider_yaml_paths(config_dir).await; + for error in read_errors { + error_log.push(YamlError { + path: providers_dir.to_string_lossy().to_string(), + error_line: 0, + error_msg: error.to_string(), + }); + } + + let provider_templates = get_provider_templates(); + + for yaml_path in yaml_paths { + let provider_name = match yaml_path.file_stem() { + Some(name) => name.to_string_lossy().to_string(), + None => continue, + }; + + if provider_templates.contains_key(&provider_name) { + match get_provider_from_template_and_config_file(config_dir, &provider_name, false, false).await { + Ok(provider) => { + providers.push(provider); + }, + Err(e) => { + error_log.push(YamlError { + path: yaml_path.to_string_lossy().to_string(), + error_line: 0, + error_msg: e, + }); + } + } + } else { + let content = match tokio::fs::read_to_string(&yaml_path).await { + Ok(content) => content, + Err(e) => { + error_log.push(YamlError { + path: yaml_path.to_string_lossy().to_string(), + error_line: 0, + error_msg: format!("Failed to read file: {}", e), + }); + continue; + } + }; + + let mut provider: CapsProvider = match serde_yaml::from_str(&content) { + Ok(provider) => provider, + Err(e) => { + error_log.push(YamlError { + path: yaml_path.to_string_lossy().to_string(), + error_line: e.location().map_or(0, |loc| loc.line()), + error_msg: format!("Failed to parse YAML: {}", e), + }); + continue; + } + }; + provider.name = provider_name; + providers.push(provider); + } + } + + (providers, error_log) +} + +fn add_running_models(provider: &mut CapsProvider) { + let models_to_add = vec![ + &provider.defaults.chat_default_model, + &provider.defaults.chat_light_model, + &provider.defaults.chat_thinking_model, + &provider.defaults.completion_default_model, + ]; + + for model in models_to_add { + if !model.is_empty() && !provider.running_models.contains(model) { + provider.running_models.push(model.clone()); + } + } +} + +/// Returns the latest modification timestamp in seconds of any YAML file in the providers.d directory +pub async fn get_latest_provider_mtime(config_dir: &Path) -> Option { + let (yaml_paths, reading_errors) = get_provider_yaml_paths(config_dir).await; + + for error in reading_errors { + tracing::error!("{error}"); + } + + let mut latest_mtime = None; + for path in yaml_paths { + match tokio::fs::metadata(&path).await { + Ok(metadata) => { + if let Ok(mtime) = metadata.modified() { + latest_mtime = match latest_mtime { + Some(current_latest) if mtime > current_latest => Some(mtime), + None => Some(mtime), + _ => latest_mtime, + }; + } + }, + Err(e) => { + tracing::error!("Failed to get metadata for {}: {}", path.display(), e); + } + } + } + + latest_mtime.map(|mtime| mtime.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()) +} + +pub fn add_models_to_caps(caps: &mut CodeAssistantCaps, providers: Vec) { + fn add_provider_details_to_model(base_model_rec: &mut BaseModelRecord, provider: &CapsProvider, model_name: &str, endpoint: &str) { + base_model_rec.api_key = provider.api_key.clone(); + base_model_rec.tokenizer_api_key = provider.tokenizer_api_key.clone(); + base_model_rec.endpoint = endpoint.replace("$MODEL", model_name); + base_model_rec.support_metadata = provider.support_metadata; + base_model_rec.endpoint_style = provider.endpoint_style.clone(); + } + + for mut provider in providers { + + let completion_models = std::mem::take(&mut provider.completion_models); + for (model_name, mut model_rec) in completion_models { + if model_rec.base.endpoint.is_empty() { + add_provider_details_to_model( + &mut model_rec.base, &provider, &model_name, &provider.completion_endpoint + ); + + if provider.code_completion_n_ctx > 0 && provider.code_completion_n_ctx < model_rec.base.n_ctx { + // model is capable of more, but we may limit it from server or provider, e.x. for latency + model_rec.base.n_ctx = provider.code_completion_n_ctx; + } + } + + caps.completion_models.insert(model_rec.base.id.clone(), Arc::new(model_rec)); + } + + let chat_models = std::mem::take(&mut provider.chat_models); + for (model_name, mut model_rec) in chat_models { + if model_rec.base.endpoint.is_empty() { + add_provider_details_to_model( + &mut model_rec.base, &provider, &model_name, &provider.chat_endpoint + ); + } + + caps.chat_models.insert(model_rec.base.id.clone(), Arc::new(model_rec)); + } + + if provider.embedding_model.is_configured() && provider.embedding_model.base.enabled { + let mut embedding_model = std::mem::take(&mut provider.embedding_model); + + if embedding_model.base.endpoint.is_empty() { + let model_name = embedding_model.base.name.clone(); + add_provider_details_to_model( + &mut embedding_model.base, &provider, &model_name, &provider.embedding_endpoint + ); + } + caps.embedding_model = embedding_model; + } + + caps.defaults.apply_override(&provider.defaults, Some(&provider.name)); + } +} + +fn add_name_and_id_to_model_records(provider: &mut CapsProvider) { + for (model_name, model_rec) in &mut provider.completion_models { + model_rec.base.name = model_name.to_string(); + model_rec.base.id = format!("{}/{}", provider.name, model_name); + } + + for (model_name, model_rec) in &mut provider.chat_models { + model_rec.base.name = model_name.to_string(); + model_rec.base.id = format!("{}/{}", provider.name, model_name); + } + + if provider.embedding_model.is_configured() { + provider.embedding_model.base.id = format!("{}/{}", provider.name, provider.embedding_model.base.name); + } +} + +fn apply_models_dict_patch(provider: &mut CapsProvider) { + for (model_name, rec_patched) in provider.models_dict_patch.iter() { + if let Some(completion_rec) = provider.completion_models.get_mut(model_name) { + if let Some(n_ctx) = rec_patched.get("n_ctx").and_then(|v| v.as_u64()) { + completion_rec.base.n_ctx = n_ctx as usize; + } + } + + if let Some(chat_rec) = provider.chat_models.get_mut(model_name) { + if let Some(n_ctx) = rec_patched.get("n_ctx").and_then(|v| v.as_u64()) { + chat_rec.base.n_ctx = n_ctx as usize; + } + + if let Some(supports_tools) = rec_patched.get("supports_tools").and_then(|v| v.as_bool()) { + chat_rec.supports_tools = supports_tools; + } + if let Some(supports_multimodality) = rec_patched.get("supports_multimodality").and_then(|v| v.as_bool()) { + chat_rec.supports_multimodality = supports_multimodality; + } + } + } +} + +#[derive(Deserialize)] +pub struct KnownModels { + pub completion_models: IndexMap, + pub chat_models: IndexMap, + pub embedding_models: IndexMap, +} +const UNPARSED_KNOWN_MODELS: &'static str = include_str!("../known_models.json"); +static KNOWN_MODELS: OnceLock = OnceLock::new(); + +pub fn get_known_models() -> &'static KnownModels { + KNOWN_MODELS.get_or_init(|| { + serde_json::from_str::(UNPARSED_KNOWN_MODELS).map_err(|e| { + let up_to_line = UNPARSED_KNOWN_MODELS.lines().take(e.line()).collect::>().join("\n"); + panic!("{}\nfailed to parse KNOWN_MODELS: {}", up_to_line, e); + }).unwrap() + }) +} + +fn populate_model_records(provider: &mut CapsProvider) { + let known_models = get_known_models(); + + for model_name in &provider.running_models { + if !provider.completion_models.contains_key(model_name) { + if let Some(model_rec) = find_model_match(model_name, &provider.completion_models, &known_models.completion_models) { + provider.completion_models.insert(model_name.clone(), model_rec); + } + } + + if !provider.chat_models.contains_key(model_name) { + if let Some(model_rec) = find_model_match(model_name, &provider.chat_models, &known_models.chat_models) { + provider.chat_models.insert(model_name.clone(), model_rec); + } + } + } + + for model in &provider.running_models { + if !provider.completion_models.contains_key(model) && + !provider.chat_models.contains_key(model) && + !(model == &provider.embedding_model.base.name) { + tracing::warn!("Indicated as running, unknown model {:?} for provider {}, maybe update this rust binary", model, provider.name); + } + } + + if !provider.embedding_model.is_configured() && !provider.embedding_model.base.name.is_empty() { + let model_name = provider.embedding_model.base.name.clone(); + if let Some(model_rec) = find_model_match(&model_name, &IndexMap::new(), &known_models.embedding_models) { + provider.embedding_model = model_rec; + provider.embedding_model.base.name = model_name; + } else { + tracing::warn!("Unknown embedding model '{}', maybe configure it or update this binary", model_name); + } + } +} + +fn find_model_match( + model_name: &String, + provider_models: &IndexMap, + known_models: &IndexMap +) -> Option { + let model_stripped = strip_model_from_finetune(model_name); + + if let Some(model) = provider_models.get(model_name) + .or_else(|| provider_models.get(&model_stripped)) { + return Some(model.clone()); + } + + for model in provider_models.values() { + if model.base().similar_models.contains(model_name) || + model.base().similar_models.contains(&model_stripped) { + return Some(model.clone()); + } + } + + if let Some(model) = known_models.get(model_name) + .or_else(|| known_models.get(&model_stripped)) { + return Some(model.clone()); + } + + for model in known_models.values() { + if model.base().similar_models.contains(&model_name.to_string()) || + model.base().similar_models.contains(&model_stripped) { + return Some(model.clone()); + } + } + + None +} + +pub fn resolve_api_key(provider: &CapsProvider, key: &str, fallback: &str, key_name: &str) -> String { + match key { + k if k.is_empty() => fallback.to_string(), + k if k.starts_with("$") => { + match std::env::var(&k[1..]) { + Ok(env_val) => env_val, + Err(e) => { + tracing::error!( + "tried to read {} from env var {} for provider {}, but failed: {}", + key_name, k, provider.name, e + ); + fallback.to_string() + } + } + } + k => k.to_string(), + } +} + +pub fn resolve_provider_api_key(provider: &CapsProvider, cmdline_api_key: &str) -> String { + resolve_api_key(provider, &provider.api_key, &cmdline_api_key, "API key") +} + +pub fn resolve_tokenizer_api_key(provider: &CapsProvider) -> String { + resolve_api_key(provider, &provider.tokenizer_api_key, "", "tokenizer API key") +} + +pub async fn get_provider_from_template_and_config_file( + config_dir: &Path, name: &str, config_file_must_exist: bool, post_process: bool +) -> Result { + let mut provider = get_provider_templates().get(name).cloned() + .ok_or("Provider template not found")?; + + let provider_path = config_dir.join("providers.d").join(format!("{name}.yaml")); + let config_file_value = match tokio::fs::read_to_string(&provider_path).await { + Ok(content) => { + serde_yaml::from_str::(&content) + .map_err_with_prefix(format!("Error parsing file {}:", provider_path.display()))? + }, + Err(e) if e.kind() == std::io::ErrorKind::NotFound && !config_file_must_exist => { + serde_yaml::Value::Mapping(serde_yaml::Mapping::new()) + }, + Err(e) => { + return Err(format!("Failed to read file {}: {}", provider_path.display(), e)); + } + }; + + provider.apply_override(config_file_value)?; + + if post_process { + post_process_provider(&mut provider, true); + } + + Ok(provider) +} + +pub async fn get_provider_from_server(gcx: Arc>) -> Result { + let command_line = CommandLine::from_args(); + let cmdline_api_key = command_line.api_key.clone(); + let (caps_value, caps_url) = load_caps_value_from_url(command_line, gcx.clone()).await?; + + if let Ok(self_hosted_caps) = serde_json::from_value::(caps_value.clone()) { + let mut provider = self_hosted_caps.into_provider(&caps_url, &cmdline_api_key)?; + post_process_provider(&mut provider, true); + provider.api_key = resolve_provider_api_key(&provider, &cmdline_api_key); + provider.tokenizer_api_key = resolve_tokenizer_api_key(&provider); + Ok(provider) + } else { + let mut provider = serde_json::from_value::(caps_value).map_err_to_string()?; + + resolve_relative_urls(&mut provider, &caps_url)?; + post_process_provider(&mut provider, true); + provider.api_key = resolve_provider_api_key(&provider, &cmdline_api_key); + provider.tokenizer_api_key = resolve_tokenizer_api_key(&provider); + Ok(provider) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_provider_templates() { + let _ = get_provider_templates(); // This will panic if any template fails to parse + } + + #[test] + fn test_parse_known_models() { + let _ = get_known_models(); // This will panic if any model fails to parse + } +} diff --git a/refact-agent/engine/src/caps/self_hosted.rs b/refact-agent/engine/src/caps/self_hosted.rs new file mode 100644 index 000000000..6bedfa265 --- /dev/null +++ b/refact-agent/engine/src/caps/self_hosted.rs @@ -0,0 +1,370 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use indexmap::IndexMap; +use serde::Deserialize; + +use crate::caps::{ + BaseModelRecord, ChatModelRecord, CodeAssistantCaps, CompletionModelRecord, DefaultModels, + EmbeddingModelRecord, default_chat_scratchpad, default_completion_scratchpad, + default_completion_scratchpad_patch, default_embedding_batch, default_hf_tokenizer_template, + default_rejection_threshold, relative_to_full_url, normalize_string, resolve_relative_urls +}; +use crate::caps::providers; + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCapsModelRecord { + pub n_ctx: usize, + + #[serde(default)] + pub supports_scratchpads: HashMap, + + #[serde(default)] + pub supports_tools: bool, + + #[serde(default)] + pub supports_multimodality: bool, + + #[serde(default)] + pub supports_clicks: bool, + + #[serde(default)] + pub supports_agent: bool, + + #[serde(default)] + pub supports_reasoning: Option, + + #[serde(default)] + pub supports_boost_reasoning: bool, + + #[serde(default)] + pub default_temperature: Option, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCapsEmbeddingModelRecord { + pub n_ctx: usize, + pub size: i32, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCapsCompletion { + pub endpoint: String, + pub models: IndexMap, + pub default_model: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCapsChat { + pub endpoint: String, + pub models: IndexMap, + pub default_model: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCapsEmbedding { + pub endpoint: String, + pub models: IndexMap, + pub default_model: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCapsTelemetryEndpoints { + pub telemetry_basic_endpoint: String, + pub telemetry_basic_retrieve_my_own_endpoint: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct SelfHostedCaps { + #[serde(deserialize_with = "normalize_string")] + pub cloud_name: String, + + pub completion: SelfHostedCapsCompletion, + pub chat: SelfHostedCapsChat, + pub embedding: SelfHostedCapsEmbedding, + + pub telemetry_endpoints: SelfHostedCapsTelemetryEndpoints, + pub tokenizer_endpoints: HashMap, + + #[serde(default)] + pub customization: String, + pub caps_version: i64, +} + +fn configure_base_model( + base_model: &mut BaseModelRecord, + model_name: &str, + endpoint: &str, + cloud_name: &str, + tokenizer_endpoints: &HashMap, + caps_url: &String, + cmdline_api_key: &str, +) -> Result<(), String> { + base_model.name = model_name.to_string(); + base_model.id = format!("{}/{}", cloud_name, model_name); + if base_model.endpoint.is_empty() { + base_model.endpoint = relative_to_full_url(caps_url, &endpoint.replace("$MODEL", model_name))?; + } + if let Some(tokenizer) = tokenizer_endpoints.get(&base_model.name) { + base_model.tokenizer = relative_to_full_url(caps_url, &tokenizer)?; + } + base_model.api_key = cmdline_api_key.to_string(); + base_model.endpoint_style = "openai".to_string(); + Ok(()) +} + +impl SelfHostedCapsModelRecord { + fn get_completion_scratchpad(&self) -> (String, serde_json::Value) { + if !self.supports_scratchpads.is_empty() { + let scratchpad_name = self.supports_scratchpads.keys().next().unwrap_or(&default_completion_scratchpad()).clone(); + let scratchpad_patch = self.supports_scratchpads.values().next().unwrap_or(&serde_json::Value::Null).clone(); + (scratchpad_name, scratchpad_patch) + } else { + (default_completion_scratchpad(), default_completion_scratchpad_patch()) + } + } + + fn get_chat_scratchpad(&self) -> (String, serde_json::Value) { + if !self.supports_scratchpads.is_empty() { + let scratchpad_name = self.supports_scratchpads.keys().next().unwrap_or(&default_chat_scratchpad()).clone(); + let scratchpad_patch = self.supports_scratchpads.values().next().unwrap_or(&serde_json::Value::Null).clone(); + (scratchpad_name, scratchpad_patch) + } else { + (default_chat_scratchpad(), serde_json::Value::Null) + } + } + + pub fn into_completion_model( + &self, + model_name: &str, + self_hosted_caps: &SelfHostedCaps, + caps_url: &String, + cmdline_api_key: &str, + ) -> Result { + let mut base = BaseModelRecord { + n_ctx: self.n_ctx, + enabled: true, + ..Default::default() + }; + + configure_base_model( + &mut base, + model_name, + &self_hosted_caps.completion.endpoint, + &self_hosted_caps.cloud_name, + &self_hosted_caps.tokenizer_endpoints, + caps_url, + cmdline_api_key, + )?; + + let (scratchpad, scratchpad_patch) = self.get_completion_scratchpad(); + + Ok(CompletionModelRecord { + base, + scratchpad, + scratchpad_patch, + model_family: None, + }) + } +} + +impl SelfHostedCapsModelRecord { + pub fn into_chat_model( + &self, + model_name: &str, + self_hosted_caps: &SelfHostedCaps, + caps_url: &String, + cmdline_api_key: &str, + ) -> Result { + let mut base = BaseModelRecord { + n_ctx: self.n_ctx, + enabled: true, + ..Default::default() + }; + + let (scratchpad, scratchpad_patch) = self.get_chat_scratchpad(); + + // Non passthrough models, don't support endpoints of `/v1/chat/completions` in openai style, only `/v1/completions` + let endpoint_to_use = if scratchpad == "PASSTHROUGH" { + &self_hosted_caps.chat.endpoint + } else { + &self_hosted_caps.completion.endpoint + }; + + configure_base_model( + &mut base, + model_name, + endpoint_to_use, + &self_hosted_caps.cloud_name, + &self_hosted_caps.tokenizer_endpoints, + caps_url, + cmdline_api_key, + )?; + + Ok(ChatModelRecord { + base, + scratchpad, + scratchpad_patch, + supports_tools: self.supports_tools, + supports_multimodality: self.supports_multimodality, + supports_clicks: self.supports_clicks, + supports_agent: self.supports_agent, + supports_reasoning: self.supports_reasoning.clone(), + supports_boost_reasoning: self.supports_boost_reasoning, + default_temperature: self.default_temperature, + }) + } +} + +impl SelfHostedCapsEmbeddingModelRecord { + pub fn into_embedding_model( + &self, + model_name: &str, + self_hosted_caps: &SelfHostedCaps, + caps_url: &String, + cmdline_api_key: &str, + ) -> Result { + let mut embedding_model = EmbeddingModelRecord { + base: BaseModelRecord { n_ctx: self.n_ctx, enabled: true, ..Default::default() }, + embedding_size: self.size, + rejection_threshold: default_rejection_threshold(), + embedding_batch: default_embedding_batch(), + }; + + configure_base_model( + &mut embedding_model.base, + model_name, + &self_hosted_caps.embedding.endpoint, + &self_hosted_caps.cloud_name, + &self_hosted_caps.tokenizer_endpoints, + caps_url, + cmdline_api_key, + )?; + + Ok(embedding_model) + } +} + + +impl SelfHostedCaps { + pub fn into_caps(self, caps_url: &String, cmdline_api_key: &str) -> Result { + let mut caps = CodeAssistantCaps { + cloud_name: self.cloud_name.clone(), + + telemetry_basic_dest: relative_to_full_url(caps_url, &self.telemetry_endpoints.telemetry_basic_endpoint)?, + telemetry_basic_retrieve_my_own: relative_to_full_url(caps_url, &self.telemetry_endpoints.telemetry_basic_retrieve_my_own_endpoint)?, + + completion_models: IndexMap::new(), + chat_models: IndexMap::new(), + embedding_model: EmbeddingModelRecord::default(), + + defaults: DefaultModels { + completion_default_model: format!("{}/{}", self.cloud_name, self.completion.default_model), + chat_default_model: format!("{}/{}", self.cloud_name, self.chat.default_model), + chat_thinking_model: String::new(), + chat_light_model: format!("{}/{}", self.cloud_name, self.chat.default_model), + }, + customization: self.customization.clone(), + caps_version: self.caps_version, + + hf_tokenizer_template: default_hf_tokenizer_template(), + }; + + for (model_name, model_rec) in &self.completion.models { + let completion_model = model_rec.into_completion_model( + model_name, + &self, + caps_url, + cmdline_api_key, + )?; + + caps.completion_models.insert(completion_model.base.id.clone(), Arc::new(completion_model)); + } + + for (model_name, model_rec) in &self.chat.models { + let chat_model = model_rec.into_chat_model( + model_name, + &self, + caps_url, + cmdline_api_key, + )?; + + caps.chat_models.insert(chat_model.base.id.clone(), Arc::new(chat_model)); + } + + if let Some((model_name, model_rec)) = self.embedding.models.get_key_value(&self.embedding.default_model) { + let embedding_model = model_rec.into_embedding_model( + model_name, + &self, + caps_url, + cmdline_api_key, + )?; + caps.embedding_model = embedding_model; + } + + Ok(caps) + } + + pub fn into_provider(self, caps_url: &String, cmdline_api_key: &str) -> Result { + let mut provider = providers::CapsProvider { + name: self.cloud_name.clone(), + enabled: true, + supports_completion: true, + endpoint_style: "openai".to_string(), + completion_endpoint: self.completion.endpoint.clone(), + chat_endpoint: self.chat.endpoint.clone(), + embedding_endpoint: self.embedding.endpoint.clone(), + api_key: cmdline_api_key.to_string(), + tokenizer_api_key: cmdline_api_key.to_string(), + code_completion_n_ctx: 0, + support_metadata: false, + completion_models: IndexMap::new(), + chat_models: IndexMap::new(), + embedding_model: EmbeddingModelRecord::default(), + models_dict_patch: IndexMap::new(), + defaults: DefaultModels { + completion_default_model: self.completion.default_model.clone(), + chat_default_model: self.chat.default_model.clone(), + chat_thinking_model: String::new(), + chat_light_model: String::new(), + }, + running_models: Vec::new(), + }; + + for (model_name, model_rec) in &self.completion.models { + let completion_model = model_rec.into_completion_model( + model_name, + &self, + caps_url, + cmdline_api_key, + )?; + + provider.completion_models.insert(model_name.clone(), completion_model); + } + + for (model_name, model_rec) in &self.chat.models { + let chat_model = model_rec.into_chat_model( + model_name, + &self, + caps_url, + cmdline_api_key, + )?; + + provider.chat_models.insert(model_name.clone(), chat_model); + } + + if let Some((model_name, model_rec)) = self.embedding.models.get_key_value(&self.embedding.default_model) { + let embedding_model = model_rec.into_embedding_model( + model_name, + &self, + caps_url, + cmdline_api_key, + )?; + provider.embedding_model = embedding_model; + } + + resolve_relative_urls(&mut provider, caps_url)?; + + Ok(provider) + } +} diff --git a/refact-agent/engine/src/custom_error.rs b/refact-agent/engine/src/custom_error.rs index 454aef962..191c5b763 100644 --- a/refact-agent/engine/src/custom_error.rs +++ b/refact-agent/engine/src/custom_error.rs @@ -1,4 +1,5 @@ use std::error::Error; +use serde::Serialize; use hyper::StatusCode; use serde_json::json; use std::fmt; @@ -52,6 +53,35 @@ impl ScratchError { } } +#[derive(Serialize, Default)] +pub struct YamlError { + pub path: String, + pub error_line: usize, // starts with 1, zero if invalid + pub error_msg: String, +} + +impl From<(&str, &serde_yaml::Error)> for YamlError { + fn from((path, err): (&str, &serde_yaml::Error)) -> Self { + YamlError { + path: path.to_string(), + error_line: err.location().map(|loc| loc.line()).unwrap_or(0), + error_msg: err.to_string(), + } + } +} + +impl fmt::Display for YamlError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}:{} {:?}", + crate::nicer_logs::last_n_chars(&self.path, 40), + self.error_line, + self.error_msg + ) + } +} + pub trait MapErrToString { /// Same as .map_err(|e| e.to_string()) fn map_err_to_string(self) -> Result; diff --git a/refact-agent/engine/src/fetch_embedding.rs b/refact-agent/engine/src/fetch_embedding.rs index 81a00bd08..38a8b4c45 100644 --- a/refact-agent/engine/src/fetch_embedding.rs +++ b/refact-agent/engine/src/fetch_embedding.rs @@ -3,22 +3,20 @@ use std::sync::Arc; use tokio::sync::Mutex as AMutex; use tracing::error; +use crate::caps::EmbeddingModelRecord; use crate::forward_to_hf_endpoint::get_embedding_hf_style; use crate::forward_to_openai_endpoint::get_embedding_openai_style; pub async fn get_embedding( client: Arc>, - endpoint_embeddings_style: &String, - model_name: &String, - endpoint_template: &String, + embedding_model: &EmbeddingModelRecord, text: Vec, - api_key: &String, ) -> Result>, String> { - match endpoint_embeddings_style.to_lowercase().as_str() { - "hf" => get_embedding_hf_style(client, text, endpoint_template, model_name, api_key).await, - "openai" => get_embedding_openai_style(client, text, endpoint_template, model_name, api_key).await, + match embedding_model.base.endpoint_style.to_lowercase().as_str() { + "hf" => get_embedding_hf_style(client, text, embedding_model).await, + "openai" => get_embedding_openai_style(client, text, embedding_model).await, _ => { - error!("Invalid endpoint_embeddings_style: {}", endpoint_embeddings_style); + error!("Invalid endpoint_embeddings_style: {}", embedding_model.base.endpoint_style); Err("Invalid endpoint_embeddings_style".to_string()) } } @@ -29,13 +27,10 @@ const SLEEP_ON_BATCH_ONE: u64 = 100; // HF often returns 500 errors for no reason -pub async fn get_embedding_with_retry( +pub async fn get_embedding_with_retries( client: Arc>, - endpoint_embeddings_style: &String, - model_name: &String, - endpoint_template: &String, + embedding_model: &EmbeddingModelRecord, text: Vec, - api_key: &String, max_retries: usize, ) -> Result>, String> { let mut attempt_n = 0; @@ -43,11 +38,8 @@ pub async fn get_embedding_with_retry( attempt_n += 1; match get_embedding( client.clone(), - endpoint_embeddings_style, - model_name, - endpoint_template, + embedding_model, text.clone(), - api_key, ).await { Ok(embedding) => return Ok(embedding), Err(e) => { diff --git a/refact-agent/engine/src/forward_to_hf_endpoint.rs b/refact-agent/engine/src/forward_to_hf_endpoint.rs index 86bc8c348..9e830cd9a 100644 --- a/refact-agent/engine/src/forward_to_hf_endpoint.rs +++ b/refact-agent/engine/src/forward_to_hf_endpoint.rs @@ -8,27 +8,24 @@ use serde_json::json; use tokio::sync::Mutex as AMutex; use crate::call_validation::{ChatMeta, SamplingParameters}; +use crate::caps::BaseModelRecord; +use crate::caps::EmbeddingModelRecord; // Idea: use USER_AGENT // let user_agent = format!("{NAME}/{VERSION}; rust/unknown; ide/{ide:?}"); pub async fn forward_to_hf_style_endpoint( - save_url: &mut String, - bearer: String, - model_name: &str, + model_rec: &BaseModelRecord, prompt: &str, client: &reqwest::Client, - endpoint_template: &String, sampling_parameters: &SamplingParameters, meta: Option ) -> Result { - let url = endpoint_template.replace("$MODEL", model_name); - save_url.clone_from(&&url); let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap()); - if !bearer.is_empty() { - headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap()); + if !model_rec.api_key.is_empty() { + headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap()); } let params_string = serde_json::to_string(sampling_parameters).unwrap(); let mut params_json = serde_json::from_str::(¶ms_string).unwrap(); @@ -42,7 +39,7 @@ pub async fn forward_to_hf_style_endpoint( data["meta"] = serde_json::to_value(meta).unwrap(); } - let req = client.post(&url) + let req = client.post(&model_rec.endpoint) .headers(headers) .body(data.to_string()) .send() @@ -50,34 +47,29 @@ pub async fn forward_to_hf_style_endpoint( let resp = req.map_err(|e| format!("{}", e))?; let status_code = resp.status().as_u16(); let response_txt = resp.text().await.map_err(|e| - format!("reading from socket {}: {}", url, e) + format!("reading from socket {}: {}", model_rec.endpoint, e) )?; if status_code != 200 { - return Err(format!("{} status={} text {}", url, status_code, response_txt)); + return Err(format!("{} status={} text {}", model_rec.endpoint, status_code, response_txt)); } Ok(match serde_json::from_str(&response_txt) { Ok(json) => json, - Err(e) => return Err(format!("{}: {}", url, e)), + Err(e) => return Err(format!("{}: {}", model_rec.endpoint, e)), }) } pub async fn forward_to_hf_style_endpoint_streaming( - save_url: &mut String, - bearer: String, - model_name: &str, + model_rec: &BaseModelRecord, prompt: &str, client: &reqwest::Client, - endpoint_template: &String, sampling_parameters: &SamplingParameters, meta: Option ) -> Result { - let url = endpoint_template.replace("$MODEL", model_name); - save_url.clone_from(&&url); let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap()); - if !bearer.is_empty() { - headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap()); + if !model_rec.api_key.is_empty() { + headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap()); } let params_string = serde_json::to_string(sampling_parameters).unwrap(); let mut params_json = serde_json::from_str::(¶ms_string).unwrap(); @@ -92,11 +84,11 @@ pub async fn forward_to_hf_style_endpoint_streaming( data["meta"] = serde_json::to_value(meta).unwrap(); } - let builder = client.post(&url) + let builder = client.post(&model_rec.endpoint) .headers(headers) .body(data.to_string()); let event_source: EventSource = EventSource::new(builder).map_err(|e| - format!("can't stream from {}: {}", url, e) + format!("can't stream from {}: {}", model_rec.endpoint, e) )?; Ok(event_source) } @@ -125,16 +117,13 @@ struct EmbeddingsPayloadHF { pub async fn get_embedding_hf_style( client: std::sync::Arc>, text: Vec, - endpoint_template: &String, - model_name: &String, - api_key: &String, + model: &EmbeddingModelRecord, ) -> Result>, String> { let payload = EmbeddingsPayloadHF { inputs: text, options: EmbeddingsPayloadHFOptions::new() }; - let url = endpoint_template.clone().replace("$MODEL", &model_name); let maybe_response = client.lock().await - .post(&url) - .bearer_auth(api_key.clone()) + .post(&model.base.endpoint) + .bearer_auth(model.base.api_key.clone()) .json(&payload) .send() .await; diff --git a/refact-agent/engine/src/forward_to_openai_endpoint.rs b/refact-agent/engine/src/forward_to_openai_endpoint.rs index 6721c7aef..8879a2188 100644 --- a/refact-agent/engine/src/forward_to_openai_endpoint.rs +++ b/refact-agent/engine/src/forward_to_openai_endpoint.rs @@ -10,33 +10,29 @@ use tokio::sync::Mutex as AMutex; use tracing::info; use crate::call_validation::{ChatMeta, SamplingParameters}; +use crate::caps::BaseModelRecord; +use crate::custom_error::MapErrToString; use crate::scratchpads::chat_utils_limit_history::CompressionStrength; +use crate::caps::EmbeddingModelRecord; pub async fn forward_to_openai_style_endpoint( - save_url: &mut String, - bearer: String, - model_name: &str, + model_rec: &BaseModelRecord, prompt: &str, client: &reqwest::Client, - endpoint_template: &String, - endpoint_chat_passthrough: &String, sampling_parameters: &SamplingParameters, - is_metadata_supported: bool, meta: Option ) -> Result { let is_passthrough = prompt.starts_with("PASSTHROUGH "); - let url = if !is_passthrough { endpoint_template.replace("$MODEL", model_name) } else { endpoint_chat_passthrough.clone() }; - save_url.clone_from(&&url); let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap()); - if !bearer.is_empty() { - headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap()); + if !model_rec.api_key.is_empty() { + headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap()); } - if is_metadata_supported { - headers.insert(USER_AGENT, HeaderValue::from_str(format!("refact-lsp {}", crate::version::build_info::PKG_VERSION).as_str()).unwrap()); + if model_rec.support_metadata { + headers.insert(USER_AGENT, HeaderValue::from_str(&format!("refact-lsp {}", crate::version::build_info::PKG_VERSION)).unwrap()); } let mut data = json!({ - "model": model_name, + "model": model_rec.name.clone(), "stream": false, }); if !sampling_parameters.stop.is_empty() { // openai does not like empty stop @@ -49,15 +45,15 @@ pub async fn forward_to_openai_style_endpoint( data["reasoning_effort"] = serde_json::Value::String(reasoning_effort.to_string()); } else if let Some(thinking) = sampling_parameters.thinking.clone() { data["thinking"] = thinking.clone(); - } else { - data["temperature"] = serde_json::Value::from(sampling_parameters.temperature); + } else if let Some(temperature) = sampling_parameters.temperature { + data["temperature"] = serde_json::Value::from(temperature); } data["max_completion_tokens"] = serde_json::Value::from(sampling_parameters.max_new_tokens); info!("NOT STREAMING TEMP {}", sampling_parameters.temperature .map(|x| x.to_string()) .unwrap_or("None".to_string())); if is_passthrough { - passthrough_messages_to_json(&mut data, prompt, model_name); + passthrough_messages_to_json(&mut data, prompt, &model_rec.name); } else { data["prompt"] = serde_json::Value::String(prompt.to_string()); data["echo"] = serde_json::Value::Bool(false); @@ -67,23 +63,23 @@ pub async fn forward_to_openai_style_endpoint( } // When cancelling requests, coroutine ususally gets aborted here on the following line. - let req = client.post(&url) + let req = client.post(&model_rec.endpoint) .headers(headers) .body(data.to_string()) .send() .await; - let resp = req.map_err(|e| format!("{}", e))?; + let resp = req.map_err_to_string()?; let status_code = resp.status().as_u16(); let response_txt = resp.text().await.map_err(|e| - format!("reading from socket {}: {}", url, e) + format!("reading from socket {}: {}", model_rec.endpoint, e) )?; // 400 "client error" is likely a json that we rather accept here, pick up error details as we analyse json fields at the level // higher, the most often 400 is no such model. if status_code != 200 && status_code != 400 { - return Err(format!("{} status={} text {}", url, status_code, response_txt)); + return Err(format!("{} status={} text {}", model_rec.endpoint, status_code, response_txt)); } if status_code != 200 { - info!("forward_to_openai_style_endpoint: {} {}\n{}", url, status_code, response_txt); + tracing::info!("forward_to_openai_style_endpoint: {} {}\n{}", model_rec.endpoint, status_code, response_txt); } let parsed_json: serde_json::Value = match serde_json::from_str(&response_txt) { Ok(json) => json, @@ -93,37 +89,30 @@ pub async fn forward_to_openai_style_endpoint( } pub async fn forward_to_openai_style_endpoint_streaming( - save_url: &mut String, - bearer: String, - model_name: &str, + model_rec: &BaseModelRecord, prompt: &str, client: &reqwest::Client, - endpoint_template: &String, - endpoint_chat_passthrough: &String, sampling_parameters: &SamplingParameters, - is_metadata_supported: bool, meta: Option ) -> Result { let is_passthrough = prompt.starts_with("PASSTHROUGH "); - let url = if !is_passthrough { endpoint_template.replace("$MODEL", model_name) } else { endpoint_chat_passthrough.clone() }; - save_url.clone_from(&&url); let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap()); - if !bearer.is_empty() { - headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap()); + if !model_rec.api_key.is_empty() { + headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap()); } - if is_metadata_supported { + if model_rec.support_metadata { headers.insert(USER_AGENT, HeaderValue::from_str(format!("refact-lsp {}", crate::version::build_info::PKG_VERSION).as_str()).unwrap()); } let mut data = json!({ - "model": model_name, + "model": model_rec.name, "stream": true, "stream_options": {"include_usage": true}, }); if is_passthrough { - passthrough_messages_to_json(&mut data, prompt, model_name); + passthrough_messages_to_json(&mut data, prompt, &model_rec.name); } else { data["prompt"] = serde_json::Value::String(prompt.to_string()); } @@ -139,8 +128,8 @@ pub async fn forward_to_openai_style_endpoint_streaming( data["reasoning_effort"] = serde_json::Value::String(reasoning_effort.to_string()); } else if let Some(thinking) = sampling_parameters.thinking.clone() { data["thinking"] = thinking.clone(); - } else { - data["temperature"] = serde_json::Value::from(sampling_parameters.temperature); + } else if let Some(temperature) = sampling_parameters.temperature { + data["temperature"] = serde_json::Value::from(temperature); } data["max_completion_tokens"] = serde_json::Value::from(sampling_parameters.max_new_tokens); @@ -151,11 +140,15 @@ pub async fn forward_to_openai_style_endpoint_streaming( if let Some(meta) = meta { data["meta"] = json!(meta); } - let builder = client.post(&url) + + if model_rec.endpoint.is_empty() { + return Err(format!("No endpoint configured for {}", model_rec.id)); + } + let builder = client.post(&model_rec.endpoint) .headers(headers) .body(data.to_string()); let event_source: EventSource = EventSource::new(builder).map_err(|e| - format!("can't stream from {}: {}", url, e) + format!("can't stream from {}: {}", model_rec.endpoint, e) )?; Ok(event_source) } @@ -207,31 +200,33 @@ struct EmbeddingsResultOpenAI { pub index: usize, } +#[cfg(feature="vecdb")] +#[derive(serde::Deserialize)] +struct EmbeddingsResultOpenAINoIndex { + pub embedding: Vec, +} + #[cfg(feature="vecdb")] pub async fn get_embedding_openai_style( client: std::sync::Arc>, text: Vec, - endpoint_template: &String, - model_name: &String, - api_key: &String, + model_rec: &EmbeddingModelRecord, ) -> Result>, String> { - if endpoint_template.is_empty() { - return Err(format!("no embedding_endpoint configured")); + if model_rec.base.endpoint.is_empty() { + return Err(format!("No embedding endpoint configured")); } - if api_key.is_empty() { - return Err(format!("cannot access embedding model, because api_key is empty")); + if model_rec.base.api_key.is_empty() { + return Err(format!("Cannot access embedding model, because api_key is empty")); } #[allow(non_snake_case)] - let B = text.len(); + let B: usize = text.len(); let payload = EmbeddingsPayloadOpenAI { input: text, - model: model_name.clone(), + model: model_rec.base.name.to_string(), }; - let url = endpoint_template.clone(); - let api_key_clone = api_key.clone(); let response = client.lock().await - .post(&url) - .bearer_auth(api_key_clone.clone()) + .post(&model_rec.base.endpoint) + .bearer_auth(&model_rec.base.api_key) .json(&payload) .send() .await @@ -250,19 +245,36 @@ pub async fn get_embedding_openai_style( // info!("get_embedding_openai_style: {:?}", json); // {"data":[{"embedding":[0.0121664945...],"index":0,"object":"embedding"}, {}, {}]} - let unordered: Vec = match serde_json::from_value(json["data"].clone()) { - Ok(x) => x, - Err(err) => { - return Err(format!("get_embedding_openai_style: failed to parse unordered: {:?}", err)); - } - }; + // or {"data":[{"embedding":[0.0121664945...]}, {}, {}]} without index + let mut result: Vec> = vec![vec![]; B]; - for ures in unordered.into_iter() { - let index = ures.index; - if index >= B { - return Err(format!("get_embedding_openai_style: index out of bounds: {:?}", json)); + match serde_json::from_value::>(json["data"].clone()) { + Ok(unordered) => { + for ures in unordered.into_iter() { + let index = ures.index; + if index >= B { + return Err(format!("get_embedding_openai_style: index out of bounds: {:?}", json)); + } + result[index] = ures.embedding; + } + }, + Err(_) => { + match serde_json::from_value::>(json["data"].clone()) { + Ok(ordered) => { + if ordered.len() != B { + return Err(format!("get_embedding_openai_style: response length mismatch: expected {}, got {}", + B, ordered.len())); + } + for (i, res) in ordered.into_iter().enumerate() { + result[i] = res.embedding; + } + }, + Err(err) => { + tracing::info!("get_embedding_openai_style: failed to parse response: {:?}, {:?}", err, json); + return Err(format!("get_embedding_openai_style: failed to parse response: {:?}", err)); + } + } } - result[index] = ures.embedding; } Ok(result) } diff --git a/refact-agent/engine/src/global_context.rs b/refact-agent/engine/src/global_context.rs index cde382d69..e0be48c02 100644 --- a/refact-agent/engine/src/global_context.rs +++ b/refact-agent/engine/src/global_context.rs @@ -17,6 +17,7 @@ use tracing::{error, info}; use crate::ast::ast_indexer_thread::AstIndexService; use crate::caps::CodeAssistantCaps; +use crate::caps::providers::get_latest_provider_mtime; use crate::completion_cache::CompletionCache; use crate::custom_error::ScratchError; use crate::files_in_workspace::DocumentsState; @@ -34,7 +35,7 @@ pub struct CommandLine { pub logs_stderr: bool, #[structopt(long, default_value="", help="Send logs to a file.")] pub logs_to_file: String, - #[structopt(long, short="u", default_value="", help="URL to start working. The first step is to fetch capabilities from $URL/refact-caps. You can supply your own caps in a local file, too, for the bring-your-own-key use case.")] + #[structopt(long, short="u", default_value="", help="URL to use: \"Refact\" for Cloud, or your Self-Hosted Server URL. To bring your own keys, use \"Refact\" and set up providers.")] pub address_url: String, #[structopt(long, short="k", default_value="", help="The API key to authenticate your requests, will appear in HTTP requests this binary makes.")] pub api_key: String, @@ -82,7 +83,7 @@ pub struct CommandLine { #[structopt(long, short="w", default_value="", help="Workspace folder to find all the files. An LSP or HTTP request can override this later.")] pub workspace_folder: String, - #[structopt(long, help="create manually bring-your-own-key.yaml, customization.yaml and privacy.yaml and exit.")] + #[structopt(long, help="create yaml configs, like customization.yaml, privacy.yaml and exit.")] pub only_create_yaml_configs: bool, #[structopt(long, help="Print combined customization settings from both system defaults and customization.yaml.")] pub print_customization: bool, @@ -151,11 +152,11 @@ pub struct GlobalContext { pub http_client_slowdown: Arc, pub cache_dir: PathBuf, pub config_dir: PathBuf, - pub caps: Option>>, + pub caps: Option>, pub caps_reading_lock: Arc>, pub caps_last_error: String, pub caps_last_attempted_ts: u64, - pub tokenizer_map: HashMap< String, Arc>>, + pub tokenizer_map: HashMap>>, pub tokenizer_download_lock: Arc>, pub completions_cache: Arc>, pub telemetry: Arc>, @@ -209,38 +210,39 @@ pub async fn migrate_to_config_folder( pub async fn try_load_caps_quickly_if_not_present( gcx: Arc>, max_age_seconds: u64, -) -> Result>, ScratchError> { +) -> Result, ScratchError> { let cmdline = CommandLine::from_args(); // XXX make it Arc and don't reload all the time + let (caps_reading_lock, config_dir) = { + let gcx_locked = gcx.read().await; + (gcx_locked.caps_reading_lock.clone(), gcx_locked.config_dir.clone()) + }; - let caps_reading_lock: Arc> = gcx.read().await.caps_reading_lock.clone(); let now = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs(); let caps_last_attempted_ts; + let latest_provider_mtime = get_latest_provider_mtime(&config_dir).await.unwrap_or(0); { // gcx is not locked, but a specialized async mutex is, up until caps are saved let _caps_reading_locked = caps_reading_lock.lock().await; - let caps_url = cmdline.address_url.clone(); - if caps_url.to_lowercase() == "refact" || caps_url.starts_with("http") { - let max_age = if max_age_seconds > 0 { max_age_seconds } else { CAPS_BACKGROUND_RELOAD }; - { - let mut cx_locked = gcx.write().await; - if cx_locked.caps_last_attempted_ts + max_age < now { - cx_locked.caps = None; - cx_locked.caps_last_attempted_ts = 0; - caps_last_attempted_ts = 0; - } else { - if let Some(caps_arc) = cx_locked.caps.clone() { - return Ok(caps_arc.clone()); - } - caps_last_attempted_ts = cx_locked.caps_last_attempted_ts; + let max_age = if max_age_seconds > 0 { max_age_seconds } else { CAPS_BACKGROUND_RELOAD }; + { + let mut cx_locked = gcx.write().await; + if cx_locked.caps_last_attempted_ts + max_age < now || latest_provider_mtime >= cx_locked.caps_last_attempted_ts { + cx_locked.caps = None; + cx_locked.caps_last_attempted_ts = 0; + caps_last_attempted_ts = 0; + } else { + if let Some(caps_arc) = cx_locked.caps.clone() { + return Ok(caps_arc.clone()); } - } - if caps_last_attempted_ts + CAPS_RELOAD_BACKOFF > now { - let gcx_locked = gcx.write().await; - return Err(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, gcx_locked.caps_last_error.clone())); + caps_last_attempted_ts = cx_locked.caps_last_attempted_ts; } } + if caps_last_attempted_ts + CAPS_RELOAD_BACKOFF > now { + let gcx_locked = gcx.write().await; + return Err(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, gcx_locked.caps_last_error.clone())); + } let caps_result = crate::caps::load_caps( cmdline, @@ -275,9 +277,8 @@ pub async fn look_for_piggyback_fields( let new_caps_version = dict.get("caps_version").and_then(|v| v.as_i64()).unwrap_or(0); if new_caps_version > 0 { if let Some(caps) = gcx_locked.caps.clone() { - let caps_locked = caps.read().unwrap(); - if caps_locked.caps_version < new_caps_version { - info!("detected biggyback caps version {} is newer than the current version {}", new_caps_version, caps_locked.caps_version); + if caps.caps_version < new_caps_version { + info!("detected biggyback caps version {} is newer than the current version {}", new_caps_version, caps.caps_version); gcx_locked.caps = None; gcx_locked.caps_last_attempted_ts = 0; } @@ -395,14 +396,4 @@ pub async fn create_global_context( let gcx = Arc::new(ARwLock::new(cx)); crate::files_in_workspace::watcher_init(gcx.clone()).await; (gcx, ask_shutdown_receiver, cmdline) -} - -pub async fn is_metadata_supported(gcx: Arc>) -> bool { - let gcx_locked = gcx.read().await; - if let Some(caps_arc) = gcx_locked.caps.clone() { - if let Ok(caps) = caps_arc.read() { - return caps.support_metadata; - } - } - false -} +} \ No newline at end of file diff --git a/refact-agent/engine/src/http/routers/v1.rs b/refact-agent/engine/src/http/routers/v1.rs index 88c6055c3..ca8298e66 100644 --- a/refact-agent/engine/src/http/routers/v1.rs +++ b/refact-agent/engine/src/http/routers/v1.rs @@ -1,7 +1,5 @@ use axum::Router; -use axum::routing::get; -use axum::routing::post; -use axum::routing::delete; +use axum::routing::{get, post, delete}; use tower_http::cors::CorsLayer; use crate::http::utils::telemetry_middleware; @@ -31,7 +29,11 @@ use crate::http::routers::v1::gui_help_handlers::handle_v1_fullpath; use crate::http::routers::v1::subchat::{handle_v1_subchat, handle_v1_subchat_single}; use crate::http::routers::v1::sync_files::handle_v1_sync_files_extract_tar; use crate::http::routers::v1::system_prompt::handle_v1_prepend_system_prompt_and_maybe_more_initial_messages; +use crate::http::routers::v1::providers::{handle_v1_providers, handle_v1_provider_templates, + handle_v1_get_model, handle_v1_get_provider, handle_v1_models, handle_v1_post_model, handle_v1_post_provider, + handle_v1_delete_model, handle_v1_delete_provider, handle_v1_model_default, handle_v1_completion_model_families}; +#[cfg(feature = "vecdb")] #[cfg(feature = "vecdb")] use crate::http::routers::v1::vecdb::{handle_v1_vecdb_search, handle_v1_vecdb_status}; #[cfg(feature="vecdb")] @@ -66,6 +68,7 @@ pub mod sync_files; pub mod system_prompt; pub mod telemetry_chat; pub mod telemetry_network; +pub mod providers; mod file_edit_tools; #[cfg(feature = "vecdb")] @@ -140,6 +143,18 @@ pub fn make_v1_router() -> Router { .route("/links", post(handle_v1_links)) .route("/file_edit_tool_dry_run", post(handle_v1_file_edit_tool_dry_run)) + + .route("/providers", get(handle_v1_providers)) + .route("/provider-templates", get(handle_v1_provider_templates)) + .route("/provider", get(handle_v1_get_provider)) + .route("/provider", post(handle_v1_post_provider)) + .route("/provider", delete(handle_v1_delete_provider)) + .route("/models", get(handle_v1_models)) + .route("/model", get(handle_v1_get_model)) + .route("/model", post(handle_v1_post_model)) + .route("/model", delete(handle_v1_delete_model)) + .route("/model-defaults", get(handle_v1_model_default)) + .route("/completion-model-families", get(handle_v1_completion_model_families)) // experimental .route("/get-dashboard-plots", get(get_dashboard_plots)) diff --git a/refact-agent/engine/src/http/routers/v1/at_commands.rs b/refact-agent/engine/src/http/routers/v1/at_commands.rs index 22865f61a..5bc80df18 100644 --- a/refact-agent/engine/src/http/routers/v1/at_commands.rs +++ b/refact-agent/engine/src/http/routers/v1/at_commands.rs @@ -4,7 +4,6 @@ use hyper::{Body, Response, StatusCode}; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; use std::sync::Arc; -use std::sync::RwLock as StdRwLock; use serde_json::{json, Value}; use tokio::sync::RwLock as ARwLock; use tokio::sync::Mutex as AMutex; @@ -14,10 +13,11 @@ use tokenizers::Tokenizer; use tracing::info; use crate::at_commands::execute_at::run_at_commands_locally; -use crate::cached_tokenizers; +use crate::tokens; use crate::at_commands::at_commands::AtCommandsContext; use crate::at_commands::execute_at::{execute_at_commands_in_query, parse_words_from_line}; use crate::call_validation::{ChatMeta, PostprocessSettings, SubchatParameters}; +use crate::caps::resolve_chat_model; use crate::custom_error::ScratchError; use crate::global_context::try_load_caps_quickly_if_not_present; use crate::global_context::GlobalContext; @@ -49,6 +49,8 @@ struct CommandPreviewPost { #[serde(default)] model: String, #[serde(default)] + provider: String, + #[serde(default)] pub meta: ChatMeta, } @@ -129,7 +131,7 @@ pub async fn handle_v1_command_completion( .unwrap()) } -async fn count_tokens(tokenizer_arc: Arc>, messages: &Vec) -> Result { +async fn count_tokens(tokenizer_arc: Option>, messages: &Vec) -> Result { let mut accum: u64 = 0; for message in messages { @@ -169,38 +171,25 @@ pub async fn handle_v1_command_preview( }; let caps = crate::global_context::try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?; - let (model_name, recommended_model_record) = { - let caps_locked = caps.read().unwrap(); - let tmp = crate::caps::which_model_to_use( - &caps_locked.code_chat_models, - &post.model, - &caps_locked.code_chat_default_model, - ); - match tmp { - Ok(x) => (x.0, x.1.clone()), - Err(e) => { - tracing::warn!("can't find model: {}", e); - return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("can't find model: {}", e)))?; - } - } - }; - let tokenizer_arc: Arc> = match cached_tokenizers::cached_tokenizer(caps.clone(), global_context.clone(), model_name.clone()).await { + let model_rec = resolve_chat_model(caps, &post.model) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + let tokenizer_arc = match tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await { Ok(x) => x, Err(e) => { - tracing::warn!("can't load tokenizer for preview: {}", e); - return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("can't load tokenizer for preview: {}", e)))?; + tracing::error!(e); + return Err(ScratchError::new(StatusCode::BAD_REQUEST, e)); } }; - let ccx: Arc> = Arc::new(AMutex::new(AtCommandsContext::new( + let ccx = Arc::new(AMutex::new(AtCommandsContext::new( global_context.clone(), - recommended_model_record.n_ctx, + model_rec.base.n_ctx, crate::http::routers::v1::chat::CHAT_TOP_N, true, vec![], "".to_string(), false, - model_name.clone(), + model_rec.base.id.clone(), ).await)); let (messages_for_postprocessing, vec_highlights) = execute_at_commands_in_query( @@ -208,7 +197,7 @@ pub async fn handle_v1_command_preview( &mut query ).await; - let rag_n_ctx = max_tokens_for_rag_chat(recommended_model_record.n_ctx, 512); // real maxgen may be different -- comes from request + let rag_n_ctx = max_tokens_for_rag_chat(model_rec.base.n_ctx, 512); // real maxgen may be different -- comes from request let mut preview: Vec = vec![]; for exec_result in messages_for_postprocessing.iter() { @@ -277,8 +266,8 @@ pub async fn handle_v1_command_preview( Ok(Response::builder() .status(StatusCode::OK) .body(Body::from(serde_json::to_string_pretty( - &json!({"messages": preview, "model": model_name, "highlight": highlights, - "current_context": tokens_number, "number_context": recommended_model_record.n_ctx}) + &json!({"messages": preview, "model": model_rec.base.id, "highlight": highlights, + "current_context": tokens_number, "number_context": model_rec.base.n_ctx}) ).unwrap())) .unwrap()) } @@ -291,8 +280,11 @@ pub async fn handle_v1_at_command_execute( .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?; let caps = try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?; - let tokenizer = cached_tokenizers::cached_tokenizer(caps, global_context.clone(), post.model_name.clone()).await - .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error loading tokenizer: {}", e)))?; + let model_rec = resolve_chat_model(caps, &post.model_name) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + + let tokenizer = tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; let mut ccx = AtCommandsContext::new( global_context.clone(), @@ -302,7 +294,7 @@ pub async fn handle_v1_at_command_execute( vec![], "".to_string(), false, - post.model_name.clone(), + model_rec.base.id.clone(), ).await; ccx.subchat_tool_parameters = post.subchat_tool_parameters.clone(); ccx.postprocess_parameters = post.postprocess_parameters.clone(); diff --git a/refact-agent/engine/src/http/routers/v1/at_tools.rs b/refact-agent/engine/src/http/routers/v1/at_tools.rs index 168b297f2..7dd3050ad 100644 --- a/refact-agent/engine/src/http/routers/v1/at_tools.rs +++ b/refact-agent/engine/src/http/routers/v1/at_tools.rs @@ -9,8 +9,8 @@ use serde_json::Value; use tokio::sync::{Mutex as AMutex, RwLock as ARwLock}; use crate::at_commands::at_commands::AtCommandsContext; -use crate::cached_tokenizers; use crate::call_validation::{ChatMessage, ChatMeta, ChatToolCall, PostprocessSettings, SubchatParameters}; +use crate::caps::resolve_chat_model; use crate::http::http_post_json; use crate::http::routers::v1::chat::CHAT_TOP_N; use crate::integrations::docker::docker_container_manager::docker_container_get_host_lsp_port_to_connect; @@ -226,8 +226,10 @@ pub async fn handle_v1_tools_execute( .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?; let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - let tokenizer = cached_tokenizers::cached_tokenizer(caps, gcx.clone(), tools_execute_post.model_name.clone()).await - .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error loading tokenizer: {}", e)))?; + let model_rec = resolve_chat_model(caps, &tools_execute_post.model_name) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + let tokenizer = crate::tokens::cached_tokenizer(gcx.clone(), &model_rec.base).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; let mut ccx = AtCommandsContext::new( gcx.clone(), @@ -237,7 +239,7 @@ pub async fn handle_v1_tools_execute( tools_execute_post.messages.clone(), tools_execute_post.chat_id.clone(), false, - tools_execute_post.model_name.clone(), + model_rec.base.id.clone(), ).await; ccx.subchat_tool_parameters = tools_execute_post.subchat_tool_parameters.clone(); ccx.postprocess_parameters = tools_execute_post.postprocess_parameters.clone(); @@ -246,7 +248,7 @@ pub async fn handle_v1_tools_execute( let mut at_tools = tools_merged_and_filtered(gcx.clone(), false).await.map_err(|e|{ ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error getting at_tools: {}", e)) })?; - let (messages, tools_ran) = run_tools( // todo: fix typo "runned" + let (messages, tools_ran) = run_tools( ccx_arc.clone(), &mut at_tools, tokenizer.clone(), tools_execute_post.maxgen, &tools_execute_post.messages, &tools_execute_post.style ).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error running tools: {}", e)))?; diff --git a/refact-agent/engine/src/http/routers/v1/caps.rs b/refact-agent/engine/src/http/routers/v1/caps.rs index b3d46b4f9..2c84a3af9 100644 --- a/refact-agent/engine/src/http/routers/v1/caps.rs +++ b/refact-agent/engine/src/http/routers/v1/caps.rs @@ -33,8 +33,7 @@ pub async fn handle_v1_caps( )); } }; - let caps_locked = caps_arc.read().unwrap(); - let body = serde_json::to_string_pretty(&*caps_locked).unwrap(); + let body = serde_json::to_string_pretty(&*caps_arc).unwrap(); let response = Response::builder() .header("Content-Type", "application/json") .body(Body::from(body)) diff --git a/refact-agent/engine/src/http/routers/v1/chat.rs b/refact-agent/engine/src/http/routers/v1/chat.rs index 217351e8c..db61cfbaa 100644 --- a/refact-agent/engine/src/http/routers/v1/chat.rs +++ b/refact-agent/engine/src/http/routers/v1/chat.rs @@ -1,5 +1,4 @@ use std::sync::Arc; -use std::sync::RwLock as StdRwLock; use tokio::sync::Mutex as AMutex; use tokio::sync::RwLock as ARwLock; @@ -9,11 +8,11 @@ use hyper::{Body, Response, StatusCode}; use serde_json::Value; use crate::call_validation::{ChatContent, ChatMessage, ChatPost, ChatMode}; -use crate::caps::CodeAssistantCaps; +use crate::caps::resolve_chat_model; use crate::custom_error::ScratchError; use crate::at_commands::at_commands::AtCommandsContext; use crate::git::checkpoints::create_workspace_checkpoint; -use crate::global_context::{is_metadata_supported, GlobalContext, SharedGlobalContext}; +use crate::global_context::{GlobalContext, SharedGlobalContext}; use crate::integrations::docker::docker_container_manager::docker_container_check_status_or_start; @@ -56,33 +55,6 @@ pub fn available_tools_by_chat_mode(current_tools: Vec, chat_mode: &ChatM pub const CHAT_TOP_N: usize = 12; -pub async fn lookup_chat_scratchpad( - caps: Arc>, - chat_post: &ChatPost, -) -> Result<(String, String, serde_json::Value, usize, bool, bool, bool), String> { - let caps_locked = caps.read().unwrap(); - let (model_name, recommended_model_record) = - crate::caps::which_model_to_use( - &caps_locked.code_chat_models, - &chat_post.model, - &caps_locked.code_chat_default_model, - )?; - let (sname, patch) = crate::caps::which_scratchpad_to_use( - &recommended_model_record.supports_scratchpads, - &chat_post.scratchpad, - &recommended_model_record.default_scratchpad, - )?; - Ok(( - model_name, - sname.clone(), - patch.clone(), - recommended_model_record.n_ctx, - recommended_model_record.supports_tools, - recommended_model_record.supports_multimodality, - recommended_model_record.supports_clicks, - )) -} - pub async fn handle_v1_chat_completions( // standard openai-style handler Extension(gcx): Extension, @@ -110,7 +82,7 @@ pub fn deserialize_messages_from_post(messages: &Vec) -> Resu Ok(messages) } -fn fill_sampling_params(chat_post: &mut ChatPost, n_ctx: usize, model_name: &String) { +fn fill_sampling_params(chat_post: &mut ChatPost, n_ctx: usize, model_id: &str) { let mut max_tokens = if chat_post.increase_max_tokens { chat_post.max_tokens.unwrap_or(16384) } else { @@ -121,7 +93,7 @@ fn fill_sampling_params(chat_post: &mut ChatPost, n_ctx: usize, model_name: &Str if chat_post.parameters.max_new_tokens == 0 { chat_post.parameters.max_new_tokens = max_tokens; } - chat_post.model = model_name.clone(); + chat_post.model = model_id.to_string(); chat_post.parameters.n = chat_post.n; chat_post.parameters.temperature = Some(chat_post.parameters.temperature.unwrap_or(chat_post.temperature.unwrap_or(0.0))); } @@ -163,21 +135,17 @@ async fn _chat( } let caps = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - let (model_name, scratchpad_name, scratchpad_patch, n_ctx, supports_tools, supports_multimodality, supports_clicks) = lookup_chat_scratchpad( - caps.clone(), - &chat_post, - ).await.map_err(|e| { - ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", e)) - })?; - fill_sampling_params(&mut chat_post, n_ctx, &model_name); + let model_rec = resolve_chat_model(caps, &chat_post.model) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e.to_string()))?; + fill_sampling_params(&mut chat_post, model_rec.base.n_ctx, &model_rec.base.id); // extra validation to catch {"query": "Frog", "scope": "workspace"}{"query": "Toad", "scope": "workspace"} let re = regex::Regex::new(r"\{.*?\}").unwrap(); for message in messages.iter_mut() { - if !supports_multimodality { + if !model_rec.supports_multimodality { if let ChatContent::Multimodal(content) = &message.content { if content.iter().any(|el| el.is_image()) { - return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("model '{}' does not support multimodality", model_name))); + return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("model '{}' does not support multimodality", model_rec.base.id))); } } message.content = ChatContent::SimpleText(message.content.content_text_only()); @@ -221,12 +189,10 @@ async fn _chat( .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; } - let meta = { - if is_metadata_supported(gcx.clone()).await { - Some(chat_post.meta.clone()) - } else { - None - } + let meta = if model_rec.base.support_metadata { + Some(chat_post.meta.clone()) + } else { + None }; if chat_post.checkpoints_enabled { @@ -253,16 +219,11 @@ async fn _chat( // chat_post.stream = Some(false); // for debugging 400 errors that are hard to debug with streaming (because "data: " is not present and the error message is ignored by the library) let mut scratchpad = crate::scratchpads::create_chat_scratchpad( gcx.clone(), - caps, - model_name.clone(), &mut chat_post, &messages, true, - &scratchpad_name, - &scratchpad_patch, + &model_rec, allow_at, - supports_tools, - supports_clicks, ).await.map_err(|e| ScratchError::new(StatusCode::BAD_REQUEST, e) )?; @@ -281,13 +242,13 @@ async fn _chat( // } let mut ccx = AtCommandsContext::new( gcx.clone(), - n_ctx, + model_rec.base.n_ctx, CHAT_TOP_N, false, messages.clone(), chat_post.meta.chat_id.clone(), should_execute_remotely, - model_name.clone(), + model_rec.base.id.clone(), ).await; ccx.subchat_tool_parameters = chat_post.subchat_tool_parameters.clone(); ccx.postprocess_parameters = chat_post.postprocess_parameters.clone(); @@ -298,7 +259,7 @@ async fn _chat( ccx_arc.clone(), &mut scratchpad, "chat".to_string(), - model_name, + &model_rec.base, &mut chat_post.parameters, chat_post.only_deterministic_messages, meta @@ -308,7 +269,7 @@ async fn _chat( ccx_arc.clone(), scratchpad, "chat-stream".to_string(), - model_name, + model_rec.base.clone(), chat_post.parameters.clone(), chat_post.only_deterministic_messages, meta diff --git a/refact-agent/engine/src/http/routers/v1/code_completion.rs b/refact-agent/engine/src/http/routers/v1/code_completion.rs index 281fabe51..af6aace0f 100644 --- a/refact-agent/engine/src/http/routers/v1/code_completion.rs +++ b/refact-agent/engine/src/http/routers/v1/code_completion.rs @@ -1,5 +1,4 @@ use std::sync::Arc; -use std::sync::RwLock as StdRwLock; use tokio::sync::RwLock as ARwLock; use tokio::sync::Mutex as AMutex; @@ -8,8 +7,7 @@ use axum::response::Result; use hyper::{Body, Response, StatusCode}; use tracing::info; use crate::call_validation::{CodeCompletionPost, code_completion_post_validate}; -use crate::caps; -use crate::caps::CodeAssistantCaps; +use crate::caps::resolve_completion_model; use crate::completion_cache; use crate::custom_error::ScratchError; use crate::global_context::GlobalContext; @@ -21,41 +19,6 @@ use crate::at_commands::at_commands::AtCommandsContext; const CODE_COMPLETION_TOP_N: usize = 5; -async fn _lookup_code_completion_scratchpad( - caps: Arc>, - code_completion_post: &CodeCompletionPost, - look_for_multiline_model: bool, -) -> Result<(String, String, serde_json::Value, usize), String> { - let caps_locked = caps.read().unwrap(); - - let (model_name, modelrec) = if !look_for_multiline_model - || caps_locked.multiline_code_completion_default_model.is_empty() { - caps::which_model_to_use( - &caps_locked.code_completion_models, - &code_completion_post.model, - &caps_locked.code_completion_default_model, - )? - } else { - caps::which_model_to_use( - &caps_locked.code_completion_models, - &code_completion_post.model, - &caps_locked.multiline_code_completion_default_model, - )? - }; - let (sname, patch) = caps::which_scratchpad_to_use( - &modelrec.supports_scratchpads, - &code_completion_post.scratchpad, - &modelrec.default_scratchpad, - )?; - let caps_completion_n_ctx = caps_locked.code_completion_n_ctx; - let mut n_ctx = modelrec.n_ctx; - if caps_completion_n_ctx > 0 && n_ctx > caps_completion_n_ctx { - // the model might be capable of a bigger context, but server (i.e. admin) tells us to use smaller (for example because latency) - n_ctx = caps_completion_n_ctx; - } - Ok((model_name, sname.clone(), patch.clone(), n_ctx)) -} - pub async fn handle_v1_code_completion( gcx: Arc>, code_completion_post: &mut CodeCompletionPost, @@ -67,27 +30,15 @@ pub async fn handle_v1_code_completion( .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e))?; let caps = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - let maybe = _lookup_code_completion_scratchpad( - caps.clone(), - &code_completion_post, - code_completion_post.inputs.multiline - ).await; - if maybe.is_err() { - // On error, this will also invalidate caps each 10 seconds, allows to overcome empty caps situation - let _ = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 10).await; - return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", maybe.unwrap_err()))) - } - let (model_name, scratchpad_name, scratchpad_patch, n_ctx) = maybe.unwrap(); + let model_rec = resolve_completion_model(caps, &code_completion_post.model, true) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e.to_string()))?; if code_completion_post.parameters.max_new_tokens == 0 { code_completion_post.parameters.max_new_tokens = 50; } if code_completion_post.model == "" { - code_completion_post.model = model_name.clone(); - } - if code_completion_post.scratchpad == "" { - code_completion_post.scratchpad = scratchpad_name.clone(); + code_completion_post.model = model_rec.base.id.clone(); } - info!("chosen completion model: {}, scratchpad: {}", code_completion_post.model, code_completion_post.scratchpad); + info!("chosen completion model: {}, scratchpad: {}", code_completion_post.model, model_rec.scratchpad); code_completion_post.parameters.temperature = Some(code_completion_post.parameters.temperature.unwrap_or(0.2)); let (cache_arc, tele_storage) = { let gcx_locked = gcx.write().await; @@ -109,31 +60,26 @@ pub async fn handle_v1_code_completion( let ast_service_opt = gcx.read().await.ast_service.clone(); let mut scratchpad = scratchpads::create_code_completion_scratchpad( gcx.clone(), - caps, - model_name.clone(), + &model_rec, &code_completion_post.clone(), - &scratchpad_name, - &scratchpad_patch, cache_arc.clone(), tele_storage.clone(), ast_service_opt - ).await.map_err(|e| - ScratchError::new(StatusCode::BAD_REQUEST, e) - )?; - let ccx: Arc> = Arc::new(AMutex::new(AtCommandsContext::new( + ).await.map_err(|e| ScratchError::new(StatusCode::BAD_REQUEST, e))?; + let ccx = Arc::new(AMutex::new(AtCommandsContext::new( gcx.clone(), - n_ctx, + model_rec.base.n_ctx, CODE_COMPLETION_TOP_N, true, vec![], "".to_string(), false, - code_completion_post.model.clone(), + model_rec.base.id.clone(), ).await)); if !code_completion_post.stream { - crate::restream::scratchpad_interaction_not_stream(ccx.clone(), &mut scratchpad, "completion".to_string(), model_name, &mut code_completion_post.parameters, false, None).await + crate::restream::scratchpad_interaction_not_stream(ccx.clone(), &mut scratchpad, "completion".to_string(), &model_rec.base, &mut code_completion_post.parameters, false, None).await } else { - crate::restream::scratchpad_interaction_stream(ccx.clone(), scratchpad, "completion-stream".to_string(), model_name, code_completion_post.parameters.clone(), false, None).await + crate::restream::scratchpad_interaction_stream(ccx.clone(), scratchpad, "completion-stream".to_string(), model_rec.base.clone(), code_completion_post.parameters.clone(), false, None).await } } @@ -161,11 +107,8 @@ pub async fn handle_v1_code_completion_prompt( .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e))?; let caps = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; - let maybe = _lookup_code_completion_scratchpad(caps.clone(), &post, post.inputs.multiline).await; - if maybe.is_err() { - return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", maybe.unwrap_err()))) - } - let (model_name, scratchpad_name, scratchpad_patch, n_ctx) = maybe.unwrap(); + let model_rec = resolve_completion_model(caps, &post.model, true) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e.to_string()))?; // don't need cache, but go along let (cache_arc, tele_storage) = { @@ -176,11 +119,8 @@ pub async fn handle_v1_code_completion_prompt( let ast_service_opt = gcx.read().await.ast_service.clone(); let mut scratchpad = scratchpads::create_code_completion_scratchpad( gcx.clone(), - caps, - model_name.clone(), + &model_rec, &post, - &scratchpad_name, - &scratchpad_patch, cache_arc.clone(), tele_storage.clone(), ast_service_opt @@ -188,15 +128,15 @@ pub async fn handle_v1_code_completion_prompt( ScratchError::new(StatusCode::BAD_REQUEST, e) )?; - let ccx: Arc> = Arc::new(AMutex::new(AtCommandsContext::new( + let ccx = Arc::new(AMutex::new(AtCommandsContext::new( gcx.clone(), - n_ctx, + model_rec.base.n_ctx, CODE_COMPLETION_TOP_N, true, vec![], "".to_string(), false, - model_name.clone(), + model_rec.base.id.clone(), ).await)); let prompt = scratchpad.prompt(ccx.clone(), &mut post.parameters).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Prompt: {}", e)) diff --git a/refact-agent/engine/src/http/routers/v1/customization.rs b/refact-agent/engine/src/http/routers/v1/customization.rs index 23951ce37..11b436f3e 100644 --- a/refact-agent/engine/src/http/routers/v1/customization.rs +++ b/refact-agent/engine/src/http/routers/v1/customization.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use tokio::sync::RwLock as ARwLock; use crate::global_context::GlobalContext; -use crate::custom_error::ScratchError; +use crate::custom_error::{ScratchError, YamlError}; use crate::yaml_configs::customization_loader::load_customization; @@ -24,7 +24,7 @@ pub async fn handle_v1_customization( Extension(global_context): Extension>>, _body_bytes: hyper::body::Bytes, ) -> Result, ScratchError> { - let mut error_log: Vec = Vec::new(); + let mut error_log: Vec = Vec::new(); let tconfig = load_customization(global_context.clone(), false, &mut error_log).await; let mut response_body = serde_json::to_value(tconfig).unwrap(); diff --git a/refact-agent/engine/src/http/routers/v1/dashboard.rs b/refact-agent/engine/src/http/routers/v1/dashboard.rs index 96b9c08ba..526e28502 100644 --- a/refact-agent/engine/src/http/routers/v1/dashboard.rs +++ b/refact-agent/engine/src/http/routers/v1/dashboard.rs @@ -67,7 +67,7 @@ pub async fn get_dashboard_plots( let caps = crate::global_context::try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?; let (http_client, api_key, url) = { let gcx_locked = global_context.read().await; - (gcx_locked.http_client.clone(), gcx_locked.cmdline.api_key.clone(), caps.read().unwrap().telemetry_basic_retrieve_my_own.clone()) + (gcx_locked.http_client.clone(), gcx_locked.cmdline.api_key.clone(), caps.telemetry_basic_retrieve_my_own.clone()) }; if url.is_empty() { return Err(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, "Error: no url provided from caps".to_string())); diff --git a/refact-agent/engine/src/http/routers/v1/links.rs b/refact-agent/engine/src/http/routers/v1/links.rs index be7c6f0e7..8ed788502 100644 --- a/refact-agent/engine/src/http/routers/v1/links.rs +++ b/refact-agent/engine/src/http/routers/v1/links.rs @@ -6,17 +6,14 @@ use serde::{Deserialize, Serialize}; use tokio::sync::RwLock as ARwLock; use crate::call_validation::{ChatMessage, ChatMeta, ChatMode}; +use crate::caps::resolve_chat_model; use crate::custom_error::ScratchError; -use crate::global_context::GlobalContext; +use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext}; use crate::integrations::go_to_configuration_message; use crate::agentic::generate_follow_up_message::generate_follow_up_message; use crate::git::commit_info::{get_commit_information_from_current_changes, generate_commit_messages}; // use crate::http::routers::v1::git::GitCommitPost; - -// TODO: remove this dirty hack when we add light_chat_model in caps -const LIGHT_MODEL_NAME: &str = "gpt-4o-mini"; - #[derive(Deserialize, Clone, Debug)] pub struct LinksPost { messages: Vec, @@ -225,8 +222,8 @@ pub async fn handle_v1_links( for e in integration_yaml_errors { links.push(Link { link_action: LinkAction::Goto, - link_text: format!("Syntax error in {}", crate::nicer_logs::last_n_chars(&e.integr_config_path, 20)), - link_goto: Some(format!("SETTINGS:{}", e.integr_config_path)), + link_text: format!("Syntax error in {}", crate::nicer_logs::last_n_chars(&e.path, 20)), + link_goto: Some(format!("SETTINGS:{}", e.path)), link_summary_path: None, link_tooltip: format!("Error at line {}: {}", e.error_line, e.error_msg), ..Default::default() @@ -347,8 +344,13 @@ pub async fn handle_v1_links( && post.messages.len() > 2 && post.messages.last().map(|x| x.role == "assistant").unwrap_or(false) { + let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?; + let model_id = match resolve_chat_model(caps.clone(), &caps.defaults.chat_light_model) { + Ok(light_model) => light_model.base.id.clone(), + Err(_) => post.model_name.clone(), + }; let follow_up_response = generate_follow_up_message( - post.messages.clone(), gcx.clone(), LIGHT_MODEL_NAME.to_string(), &post.model_name, &post.meta.chat_id + post.messages.clone(), gcx.clone(), &model_id, &post.meta.chat_id ).await .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error generating follow-up message: {}", e)))?; new_chat_suggestion = follow_up_response.topic_changed; diff --git a/refact-agent/engine/src/http/routers/v1/providers.rs b/refact-agent/engine/src/http/routers/v1/providers.rs new file mode 100644 index 000000000..a8741aa6f --- /dev/null +++ b/refact-agent/engine/src/http/routers/v1/providers.rs @@ -0,0 +1,715 @@ +use axum::extract::Query; +use axum::Extension; +use axum::http::{Response, StatusCode}; +use hyper::Body; +use std::path::Path; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::sync::Arc; +use tokio::sync::RwLock as ARwLock; + +use crate::call_validation::ModelType; +use crate::caps::{ChatModelRecord, CompletionModelFamily, CompletionModelRecord, EmbeddingModelRecord, HasBaseModelRecord}; +use crate::custom_error::{MapErrToString, ScratchError}; +use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext}; +use crate::caps::providers::{get_known_models, get_provider_from_server, get_provider_from_template_and_config_file, get_provider_model_default_settings_ui, get_provider_templates, read_providers_d, CapsProvider}; + +#[derive(Serialize, Deserialize, Debug)] +pub struct ProviderDTO { + name: String, + endpoint_style: String, + chat_endpoint: String, + completion_endpoint: String, + embedding_endpoint: String, + api_key: String, + #[serde(default)] + tokenizer_api_key: String, + + chat_default_model: String, + chat_thinking_model: String, + chat_light_model: String, + + enabled: bool, + #[serde(default)] + readonly: bool, + #[serde(default = "default_true")] + supports_completion: bool, +} + +fn default_true() -> bool { true } + +impl ProviderDTO { + pub fn from_caps_provider(provider: CapsProvider, readonly: bool) -> Self { + ProviderDTO { + name: provider.name, + endpoint_style: provider.endpoint_style, + chat_endpoint: provider.chat_endpoint, + completion_endpoint: if provider.supports_completion { provider.completion_endpoint } else { String::new() }, + embedding_endpoint: provider.embedding_endpoint, + api_key: provider.api_key, + tokenizer_api_key: provider.tokenizer_api_key, + chat_default_model: provider.defaults.chat_default_model, + chat_light_model: provider.defaults.chat_light_model, + chat_thinking_model: provider.defaults.chat_thinking_model, + enabled: provider.enabled, + readonly: readonly, + supports_completion: provider.supports_completion, + } + } +} + +#[derive(Serialize, Debug)] +pub struct ModelLightResponse { + name: String, + enabled: bool, + removable: bool, + user_configured: bool, +} + +impl ModelLightResponse { + pub fn new(model: T) -> Self { + ModelLightResponse { + name: model.base().name.clone(), + enabled: model.base().enabled, + removable: model.base().removable, + user_configured: model.base().user_configured, + } + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct ChatModelDTO { + n_ctx: usize, + name: String, + tokenizer: String, + enabled: bool, + + supports_tools: bool, + supports_multimodality: bool, + supports_clicks: bool, + supports_agent: bool, + supports_reasoning: Option, + supports_boost_reasoning: bool, + default_temperature: Option, + + #[serde(skip_deserializing, rename = "type", default = "model_type_chat")] + model_type: ModelType, +} + +fn model_type_chat() -> ModelType { ModelType::Chat } + +impl ChatModelDTO { + pub fn new(chat_model: ChatModelRecord) -> Self { + ChatModelDTO { + n_ctx: chat_model.base.n_ctx, + name: chat_model.base.name, + tokenizer: chat_model.base.tokenizer, + enabled: chat_model.base.enabled, + supports_tools: chat_model.supports_tools, + supports_multimodality: chat_model.supports_multimodality, + supports_clicks: chat_model.supports_clicks, + supports_agent: chat_model.supports_agent, + supports_reasoning: chat_model.supports_reasoning, + supports_boost_reasoning: chat_model.supports_boost_reasoning, + default_temperature: chat_model.default_temperature, + model_type: ModelType::Chat, + } + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct CompletionModelDTO { + n_ctx: usize, + name: String, + enabled: bool, + model_family: Option, + #[serde(skip_deserializing, rename = "type", default = "model_type_completion")] + model_type: ModelType, +} + +fn model_type_completion() -> ModelType { ModelType::Completion } + +impl CompletionModelDTO { + pub fn new(completion_model: CompletionModelRecord) -> Self { + CompletionModelDTO { + n_ctx: completion_model.base.n_ctx, + name: completion_model.base.name, + enabled: completion_model.base.enabled, + model_family: completion_model.model_family, + model_type: ModelType::Completion, + } + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct EmbeddingModelDTO { + n_ctx: usize, + name: String, + tokenizer: String, + enabled: bool, + + embedding_size: i32, + rejection_threshold: f32, + embedding_batch: usize, + + #[serde(skip_deserializing, rename = "type", default = "model_type_embedding")] + model_type: ModelType, +} + +fn model_type_embedding() -> ModelType { ModelType::Embedding } + +impl EmbeddingModelDTO { + pub fn new(embedding_model: EmbeddingModelRecord) -> Self { + EmbeddingModelDTO { + n_ctx: embedding_model.base.n_ctx, + name: embedding_model.base.name, + tokenizer: embedding_model.base.tokenizer, + enabled: embedding_model.base.enabled, + embedding_size: embedding_model.embedding_size, + rejection_threshold: embedding_model.rejection_threshold, + embedding_batch: embedding_model.embedding_batch, + model_type: ModelType::Embedding, + } + } +} + +pub async fn handle_v1_providers( + Extension(gcx): Extension>>, +) -> Response { + let config_dir = { + let gcx_locked = gcx.read().await; + gcx_locked.config_dir.clone() + }; + + let template_names = get_provider_templates().keys().collect::>(); + let (providers, read_errors) = read_providers_d(Vec::new(), &config_dir).await; + + let mut result = providers.into_iter() + .filter(|p| template_names.contains(&&p.name)) + .map(|p| json!({ + "name": p.name, + "enabled": p.enabled, + "readonly": false, + "supports_completion": p.supports_completion + })) + .collect::>(); + + match crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + Ok(caps) => { + if !caps.cloud_name.is_empty() { + result.retain(|p| p["name"] != caps.cloud_name); + result.insert(0, json!({ + "name": caps.cloud_name.clone(), + "enabled": true, + "readonly": true, + "supports_completion": true + })); + } + }, + Err(e) => { + tracing::error!("Failed to load caps, server provider will not be included: {}", e); + } + } + + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&json!({ + "providers": result, + "error_log": read_errors + })).unwrap())) + .unwrap() +} + +pub async fn handle_v1_provider_templates() -> Response { + let provider_templates = get_provider_templates(); + + let result = provider_templates.keys().map(|name| { json!({ + "name": name + })}).collect::>(); + + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&json!({ + "provider_templates": result + })).unwrap())) + .unwrap() +} + +#[derive(Deserialize)] +pub struct ProviderQueryParams { + #[serde(rename = "provider-name")] + provider_name: String, +} + +pub async fn handle_v1_get_provider( + Extension(gcx): Extension>>, + Query(params): Query, +) -> Result, ScratchError> { + let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider_name, + Err(e) => { + tracing::error!("Failed to load caps: {}", e); + false + } + }; + + let provider_dto = if use_server_provider { + let provider = get_provider_from_server(gcx.clone()).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + ProviderDTO::from_caps_provider(provider, true) + } else { + let config_dir = gcx.read().await.config_dir.clone(); + let provider = get_provider_from_template_and_config_file(&config_dir, ¶ms.provider_name, false, true).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + ProviderDTO::from_caps_provider(provider, false) + }; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&provider_dto).unwrap())) + .unwrap()) +} + +pub async fn handle_v1_post_provider( + Extension(gcx): Extension>>, + body_bytes: hyper::body::Bytes, +) -> Result, ScratchError> { + let provider_dto = serde_json::from_slice::(&body_bytes) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing provider: {}", e)))?; + + let config_dir = gcx.read().await.config_dir.clone(); + let provider_path = config_dir.join("providers.d").join(format!("{}.yaml", provider_dto.name)); + + let provider_template = get_provider_templates().get(&provider_dto.name).cloned() + .ok_or(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, "Provider template not found".to_string()))?; + + let mut file_value = read_yaml_file_as_value_if_exists(&provider_path).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + + update_yaml_field_if_needed(&mut file_value, "endpoint_style", + provider_dto.endpoint_style, provider_template.endpoint_style); + update_yaml_field_if_needed(&mut file_value, "api_key", + provider_dto.api_key, provider_template.api_key); + update_yaml_field_if_needed(&mut file_value, "tokenizer_api_key", + provider_dto.tokenizer_api_key, provider_template.tokenizer_api_key); + update_yaml_field_if_needed(&mut file_value, "chat_endpoint", + provider_dto.chat_endpoint, provider_template.chat_endpoint); + update_yaml_field_if_needed(&mut file_value, "completion_endpoint", + provider_dto.completion_endpoint, provider_template.completion_endpoint); + update_yaml_field_if_needed(&mut file_value, "embedding_endpoint", + provider_dto.embedding_endpoint, provider_template.embedding_endpoint); + update_yaml_field_if_needed(&mut file_value, "chat_default_model", + provider_dto.chat_default_model, provider_template.defaults.chat_default_model); + update_yaml_field_if_needed(&mut file_value, "chat_light_model", + provider_dto.chat_light_model, provider_template.defaults.chat_light_model); + update_yaml_field_if_needed(&mut file_value, "chat_thinking_model", + provider_dto.chat_thinking_model, provider_template.defaults.chat_thinking_model); + file_value["enabled"] = serde_yaml::Value::Bool(provider_dto.enabled); + + let file_content = serde_yaml::to_string(&file_value) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error parsing provider file: {}", e)))?; + tokio::fs::write(&provider_path, file_content.as_bytes()).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error writing provider file: {}", e)))?; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(json!({ "success": true }).to_string())) + .unwrap()) +} + +fn update_yaml_field_if_needed( + file_value: &mut serde_yaml::Value, + field_name: &str, + dto_value: String, + template_value: String, +) { + if file_value.get(field_name).is_some() || dto_value != template_value { + file_value[field_name] = serde_yaml::Value::String(dto_value); + } +} + +async fn read_yaml_file_as_value_if_exists(path: &Path) -> Result { + match tokio::fs::read_to_string(path).await { + Ok(content) => { + serde_yaml::from_str::(&content) + .map_err_with_prefix("Error parsing file:") + }, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + Ok(serde_yaml::Value::Mapping(serde_yaml::Mapping::new())) + }, + Err(e) => { + Err(format!("Error reading file: {e}")) + } + } +} + +pub async fn handle_v1_delete_provider( + Extension(gcx): Extension>>, + Query(params): Query, +) -> Result, ScratchError> { + let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider_name, + Err(e) => { + tracing::error!("Failed to load caps: {}", e); + false + } + }; + + if use_server_provider { + return Err(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, + "Cannot delete server provider".to_string())); + } + + let config_dir = gcx.read().await.config_dir.clone(); + + if !get_provider_templates().contains_key(¶ms.provider_name) { + return Err(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, + format!("Provider template '{}' not found", params.provider_name))); + } + + let provider_path = config_dir.join("providers.d") + .join(format!("{}.yaml", params.provider_name)); + + if !provider_path.exists() { + return Err(ScratchError::new(StatusCode::NOT_FOUND, + format!("Provider '{}' does not exist", params.provider_name))); + } + + tokio::fs::remove_file(&provider_path).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to delete provider file: {}", e)))?; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(json!({ "success": true }).to_string())) + .unwrap()) +} + +pub async fn handle_v1_models( + Extension(gcx): Extension>>, + Query(params): Query, +) -> Result, ScratchError> { + let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider_name, + Err(e) => { + tracing::error!("Failed to load caps: {}", e); + false + } + }; + + let provider = if use_server_provider { + get_provider_from_server(gcx.clone()).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))? + } else { + let config_dir = gcx.read().await.config_dir.clone(); + get_provider_from_template_and_config_file(&config_dir, ¶ms.provider_name, false, true).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))? + }; + + let result = serde_json::json!({ + "chat_models": provider.chat_models.into_iter() + .map(|(_, model)| ModelLightResponse::new(model)).collect::>(), + "completion_models": if provider.supports_completion { + provider.completion_models.into_iter() + .map(|(_, model)| ModelLightResponse::new(model)).collect::>() + } else { + Vec::::new() + }, + "embedding_model": ModelLightResponse::new(provider.embedding_model), + }); + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&result).unwrap())) + .unwrap()) +} + +#[derive(Deserialize)] +pub struct ModelQueryParams { + model: Option, + provider: String, + #[serde(rename = "type")] + model_type: ModelType, +} + +#[derive(Deserialize)] +pub struct ModelDefaultQueryParams { + provider: String, + #[serde(rename = "type")] + model_type: ModelType, +} + +pub async fn handle_v1_get_model( + Extension(gcx): Extension>>, + Query(params): Query, +) -> Result, ScratchError> { + let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await { + Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider, + Err(e) => { + tracing::error!("Failed to load caps: {}", e); + false + } + }; + + let provider = if use_server_provider { + get_provider_from_server(gcx.clone()).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))? + } else { + let config_dir = gcx.read().await.config_dir.clone(); + get_provider_from_template_and_config_file(&config_dir, ¶ms.provider, false, true).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))? + }; + + let model = match params.model_type { + ModelType::Chat => { + let model_name = params.model.ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?; + let chat_model = provider.chat_models.get(&model_name).cloned() + .ok_or(ScratchError::new(StatusCode::NOT_FOUND, format!("Chat model {} not found for provider {}", model_name, params.provider)))?; + serde_json::json!(ChatModelDTO::new(chat_model)) + }, + ModelType::Completion => { + if !provider.supports_completion { + return Err(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Provider {} does not support completion", params.provider))); + } + let model_name = params.model.ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?; + let completion_model = provider.completion_models.get(&model_name).cloned() + .ok_or(ScratchError::new(StatusCode::NOT_FOUND, format!("Completion model {} not found for provider {}", model_name, params.provider)))?; + serde_json::json!(CompletionModelDTO::new(completion_model)) + }, + ModelType::Embedding => { + serde_json::json!(EmbeddingModelDTO::new(provider.embedding_model)) + }, + }; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&model).unwrap())) + .unwrap()) +} + +#[derive(Deserialize)] +pub struct ModelPOST { + pub provider: String, + pub model: serde_json::Value, + #[serde(rename = "type")] + pub model_type: ModelType, +} + +pub async fn handle_v1_post_model( + Extension(gcx): Extension>>, + body_bytes: hyper::body::Bytes, +) -> Result, ScratchError> { + let post = serde_json::from_slice::(&body_bytes) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing json: {}", e)))?; + + let config_dir = gcx.read().await.config_dir.clone(); + let provider_path = config_dir.join("providers.d").join(format!("{}.yaml", post.provider)); + + let _provider_template = get_provider_templates().get(&post.provider) + .ok_or(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, "Provider template not found".to_string()))?; + + let mut file_value = read_yaml_file_as_value_if_exists(&provider_path).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + + fn get_or_create_model_mapping(file_value: &mut serde_yaml::Value, models_key: &str, model_name: &str) -> serde_yaml::Mapping { + if !file_value.get(models_key).is_some() { + file_value[models_key] = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); + } + + let model_entry = if file_value[models_key].get(model_name).is_some() { + file_value[models_key][model_name].clone() + } else { + serde_yaml::Value::Mapping(serde_yaml::Mapping::new()) + }; + + model_entry.as_mapping().unwrap_or(&serde_yaml::Mapping::new()).clone() + } + + match post.model_type { + ModelType::Chat => { + let chat_model = serde_json::from_value::(post.model) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing model: {}", e)))?; + let models_key = "chat_models"; + + let mut model_value = get_or_create_model_mapping(&mut file_value, models_key, &chat_model.name); + + model_value.insert("n_ctx".into(), chat_model.n_ctx.into()); + model_value.insert("tokenizer".into(), chat_model.tokenizer.into()); + model_value.insert("enabled".into(), chat_model.enabled.into()); + + model_value.insert("supports_tools".into(), chat_model.supports_tools.into()); + model_value.insert("supports_multimodality".into(), chat_model.supports_multimodality.into()); + model_value.insert("supports_clicks".into(), chat_model.supports_clicks.into()); + model_value.insert("supports_agent".into(), chat_model.supports_agent.into()); + model_value.insert("supports_boost_reasoning".into(), chat_model.supports_boost_reasoning.into()); + + model_value.insert("supports_reasoning".into(), + match chat_model.supports_reasoning { + Some(supports_reasoning) => supports_reasoning.into(), + None => serde_yaml::Value::Null, + } + ); + model_value.insert("default_temperature".into(), + match chat_model.default_temperature { + Some(default_temperature) => serde_yaml::Value::Number(serde_yaml::Number::from(default_temperature as f64)), + None => serde_yaml::Value::Null, + } + ); + + file_value[models_key][chat_model.name] = model_value.into(); + }, + ModelType::Completion => { + let completion_model = serde_json::from_value::(post.model) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing model: {}", e)))?; + let models_key = "completion_models"; + + let mut model_value = get_or_create_model_mapping(&mut file_value, models_key, &completion_model.name); + + if let Some(model_family) = completion_model.model_family { + let family_model_rec = get_known_models().completion_models.get(&model_family.to_string()) + .expect(&format!("Model family {} not found in known models", model_family.to_string())); + + model_value.insert("model_family".into(), model_family.to_string().into()); + model_value.insert("scratchpad".into(), family_model_rec.scratchpad.clone().into()); + model_value.insert("scratchpad_patch".into(), serde_yaml::from_str(&family_model_rec.scratchpad_patch.to_string()).unwrap()); + model_value.insert("tokenizer".into(), family_model_rec.base.tokenizer.clone().into()); + } + + model_value.insert("n_ctx".into(), completion_model.n_ctx.into()); + model_value.insert("enabled".into(), completion_model.enabled.into()); + + file_value[models_key][completion_model.name] = model_value.into(); + }, + ModelType::Embedding => { + let embedding_model = serde_json::from_value::(post.model) + .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing model: {}", e)))?; + let mut model_value = serde_yaml::Mapping::new(); + + model_value.insert("n_ctx".into(), embedding_model.n_ctx.into()); + model_value.insert("name".into(), embedding_model.name.clone().into()); + model_value.insert("tokenizer".into(), embedding_model.tokenizer.into()); + model_value.insert("enabled".into(), embedding_model.enabled.into()); + + model_value.insert("embedding_size".into(), embedding_model.embedding_size.into()); + model_value.insert("rejection_threshold".into(), serde_yaml::Value::Number(serde_yaml::Number::from(embedding_model.rejection_threshold as f64))); + model_value.insert("embedding_batch".into(), embedding_model.embedding_batch.into()); + + file_value["embedding_model"] = model_value.into(); + }, + } + + let file_content = serde_yaml::to_string(&file_value) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error parsing provider file: {}", e)))?; + tokio::fs::write(&provider_path, file_content.as_bytes()).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error writing provider file: {}", e)))?; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(json!({ "success": true }).to_string())) + .unwrap()) +} + +pub async fn handle_v1_delete_model( + Extension(gcx): Extension>>, + Query(params): Query, +) -> Result, ScratchError> { + let config_dir = gcx.read().await.config_dir.clone(); + let provider_path = config_dir.join("providers.d").join(format!("{}.yaml", params.provider)); + + let _provider_template = get_provider_templates().get(¶ms.provider) + .ok_or(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, "Provider template not found".to_string()))?; + + let mut file_value = read_yaml_file_as_value_if_exists(&provider_path).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; + + match params.model_type { + ModelType::Chat => { + let model_name = params.model.as_ref() + .ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?; + let models_key = "chat_models"; + + if !file_value.get(models_key).is_some() || !file_value[models_key].get(model_name).is_some() { + return Err(ScratchError::new(StatusCode::NOT_FOUND, + format!("Chat model {} not found for provider {}", model_name, params.provider))); + } + + if let Some(mapping) = file_value[models_key].as_mapping_mut() { + mapping.remove(model_name); + } + }, + ModelType::Completion => { + let model_name = params.model.as_ref() + .ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?; + let models_key = "completion_models"; + + if !file_value.get(models_key).is_some() || !file_value[models_key].get(model_name).is_some() { + return Err(ScratchError::new(StatusCode::NOT_FOUND, + format!("Completion model {} not found for provider {}", model_name, params.provider))); + } + + if let Some(mapping) = file_value[models_key].as_mapping_mut() { + mapping.remove(model_name); + } + }, + ModelType::Embedding => { + if !file_value.get("embedding_model").is_some() { + return Err(ScratchError::new(StatusCode::NOT_FOUND, + format!("Embedding model not found for provider {}", params.provider))); + } + + file_value.as_mapping_mut().unwrap().remove("embedding_model"); + }, + } + + let file_content = serde_yaml::to_string(&file_value) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error parsing provider file: {}", e)))?; + tokio::fs::write(&provider_path, file_content.as_bytes()).await + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error writing provider file: {}", e)))?; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(json!({ "success": true }).to_string())) + .unwrap()) +} + +pub async fn handle_v1_model_default( + Query(params): Query, +) -> Result, ScratchError> { + let model_defaults = get_provider_model_default_settings_ui().get(¶ms.provider).ok_or_else(|| + ScratchError::new(StatusCode::NOT_FOUND, "Provider not found".to_string()) + )?; + + let response_json = match params.model_type { + ModelType::Chat => serde_json::json!(ChatModelDTO::new(model_defaults.chat.clone())), + ModelType::Completion => serde_json::json!(CompletionModelDTO::new(model_defaults.completion.clone())), + ModelType::Embedding => serde_json::json!(EmbeddingModelDTO::new(model_defaults.embedding.clone())), + }; + + Ok(Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&response_json).unwrap())) + .unwrap()) +} + +pub async fn handle_v1_completion_model_families() -> Response { + let response_json = json!({ + "model_families": CompletionModelFamily::all_variants() + .into_iter().map(|family| family.to_string()).collect::>() + }); + + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_string(&response_json).unwrap())) + .unwrap() +} \ No newline at end of file diff --git a/refact-agent/engine/src/http/routers/v1/subchat.rs b/refact-agent/engine/src/http/routers/v1/subchat.rs index 9e402135c..282dc82f8 100644 --- a/refact-agent/engine/src/http/routers/v1/subchat.rs +++ b/refact-agent/engine/src/http/routers/v1/subchat.rs @@ -5,10 +5,11 @@ use axum::http::{Response, StatusCode}; use hyper::Body; use serde::Deserialize; use tokio::sync::RwLock as ARwLock; +use crate::caps::resolve_chat_model; use crate::subchat::{subchat, subchat_single}; use crate::at_commands::at_commands::AtCommandsContext; use crate::custom_error::ScratchError; -use crate::global_context::GlobalContext; +use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext}; use crate::http::routers::v1::chat::deserialize_messages_from_post; @@ -29,6 +30,7 @@ pub async fn handle_v1_subchat( let post = serde_json::from_slice::(&body_bytes) .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?; let messages = deserialize_messages_from_post(&post.messages)?; + let caps = try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?; let top_n = 7; let fake_n_ctx = 4096; @@ -43,9 +45,11 @@ pub async fn handle_v1_subchat( post.model_name.clone(), ).await)); + let model = resolve_chat_model(caps, &post.model_name) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; let new_messages = subchat( ccx.clone(), - post.model_name.as_str(), + &model.base.id, messages, post.tools_turn_on, post.wrap_up_depth, @@ -56,11 +60,11 @@ pub async fn handle_v1_subchat( None, None, None, - Some(false), + Some(false), ).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error: {}", e)))?; let new_messages = new_messages.into_iter() - .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None)).collect::>()) + .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None, &model.base.id)).collect::>()) .collect::>>(); let resp_serialised = serde_json::to_string_pretty(&new_messages).unwrap(); Ok( @@ -93,6 +97,7 @@ pub async fn handle_v1_subchat_single( let post = serde_json::from_slice::(&body_bytes) .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?; let messages = deserialize_messages_from_post(&post.messages)?; + let caps = try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?; let top_n = 7; let fake_n_ctx = 4096; @@ -107,9 +112,11 @@ pub async fn handle_v1_subchat_single( post.model_name.clone(), ).await)); + let model = resolve_chat_model(caps, &post.model_name) + .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?; let new_messages = subchat_single( ccx.clone(), - post.model_name.as_str(), + &model.base.id, messages, Some(post.tools_turn_on), post.tool_choice, @@ -125,7 +132,7 @@ pub async fn handle_v1_subchat_single( ).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error: {}", e)))?; let new_messages = new_messages.into_iter() - .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None)).collect::>()) + .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None, &model.base.id)).collect::>()) .collect::>>(); let resp_serialised = serde_json::to_string_pretty(&new_messages).unwrap(); Ok( diff --git a/refact-agent/engine/src/http/routers/v1/vecdb.rs b/refact-agent/engine/src/http/routers/v1/vecdb.rs index 9bced7ed4..efa848198 100644 --- a/refact-agent/engine/src/http/routers/v1/vecdb.rs +++ b/refact-agent/engine/src/http/routers/v1/vecdb.rs @@ -3,7 +3,6 @@ use axum::Extension; use hyper::{Body, Response, StatusCode}; use serde::{Deserialize, Serialize}; -use crate::caps::get_custom_embedding_api_key; use crate::custom_error::ScratchError; use crate::global_context::SharedGlobalContext; use crate::vecdb::vdb_structs::VecdbSearch; @@ -26,11 +25,9 @@ pub async fn handle_v1_vecdb_search( ScratchError::new(StatusCode::BAD_REQUEST, format!("JSON problem: {}", e)) })?; - let api_key = get_custom_embedding_api_key(gcx.clone()).await?; - let cx_locked = gcx.read().await; - - let search_res = match *cx_locked.vec_db.lock().await { - Some(ref db) => db.vecdb_search(post.query.to_string(), post.top_n, None, &api_key).await, + let vec_db = gcx.read().await.vec_db.clone(); + let search_res = match *vec_db.lock().await { + Some(ref db) => db.vecdb_search(post.query.to_string(), post.top_n, None).await, None => { return Err(ScratchError::new( StatusCode::INTERNAL_SERVER_ERROR, NO_VECDB.to_string(), diff --git a/refact-agent/engine/src/integrations/config_chat.rs b/refact-agent/engine/src/integrations/config_chat.rs index a4e8e4a8d..9951b4385 100644 --- a/refact-agent/engine/src/integrations/config_chat.rs +++ b/refact-agent/engine/src/integrations/config_chat.rs @@ -135,12 +135,7 @@ pub async fn mix_config_messages( let custom = crate::yaml_configs::customization_loader::load_customization(gcx.clone(), true, &mut error_log).await; // XXX: let model know there are errors for e in error_log.iter() { - tracing::error!( - "{}:{} {:?}", - crate::nicer_logs::last_n_chars(&e.integr_config_path, 30), - e.error_line, - e.error_msg, - ); + tracing::error!("{e}"); } let sp: &crate::yaml_configs::customization_loader::SystemPrompt = custom.system_prompts.get("configurator").unwrap(); diff --git a/refact-agent/engine/src/integrations/docker/integr_isolation.rs b/refact-agent/engine/src/integrations/docker/integr_isolation.rs index 62aa59ba8..547c1a86f 100644 --- a/refact-agent/engine/src/integrations/docker/integr_isolation.rs +++ b/refact-agent/engine/src/integrations/docker/integr_isolation.rs @@ -1,6 +1,5 @@ use std::sync::Arc; use serde::{Serialize, Deserialize}; -use serde_inline_default::serde_inline_default; use serde_json::Value; use async_trait::async_trait; use tokio::sync::RwLock as ARwLock; @@ -10,7 +9,6 @@ use crate::integrations::utils::{serialize_num_to_str, deserialize_str_to_num, s use crate::integrations::docker::docker_container_manager::Port; use crate::integrations::integr_abstract::{IntegrationTrait, IntegrationCommon}; -#[serde_inline_default] #[derive(Clone, Serialize, Deserialize, Default, Debug)] pub struct SettingsIsolation { pub container_workspace_folder: String, @@ -24,12 +22,14 @@ pub struct SettingsIsolation { pub ports: Vec, #[serde(serialize_with = "serialize_num_to_str", deserialize_with = "deserialize_str_to_num")] pub keep_containers_alive_for_x_minutes: u64, - #[serde_inline_default("sh".to_string())] + #[serde(default = "default_docker_entrypoint")] pub docker_entrypoint: String, #[serde(default)] pub docker_extra_params: Vec, } +fn default_docker_entrypoint() -> String { "sh".to_string() } + #[derive(Clone, Default)] pub struct IntegrationIsolation { pub common: IntegrationCommon, diff --git a/refact-agent/engine/src/integrations/integr_cmdline.rs b/refact-agent/engine/src/integrations/integr_cmdline.rs index 8795a6dcd..502bcda0c 100644 --- a/refact-agent/engine/src/integrations/integr_cmdline.rs +++ b/refact-agent/engine/src/integrations/integr_cmdline.rs @@ -21,7 +21,7 @@ use crate::call_validation::{ChatMessage, ChatContent, ContextEnum}; use crate::postprocessing::pp_command_output::{CmdlineOutputFilter, output_mini_postprocessing}; use crate::integrations::integr_abstract::{IntegrationTrait, IntegrationCommon, IntegrationConfirmation}; use crate::integrations::utils::{serialize_num_to_str, deserialize_str_to_num, serialize_opt_num_to_str, deserialize_str_to_opt_num}; -use crate::integrations::setting_up_integrations::YamlError; +use crate::custom_error::YamlError; #[derive(Deserialize, Serialize, Clone, Default)] diff --git a/refact-agent/engine/src/integrations/integr_cmdline_service.rs b/refact-agent/engine/src/integrations/integr_cmdline_service.rs index 9d2f7c057..e0939adc8 100644 --- a/refact-agent/engine/src/integrations/integr_cmdline_service.rs +++ b/refact-agent/engine/src/integrations/integr_cmdline_service.rs @@ -17,7 +17,7 @@ use crate::integrations::process_io_utils::{blocking_read_until_token_or_timeout use crate::integrations::sessions::IntegrationSession; use crate::integrations::integr_abstract::{IntegrationTrait, IntegrationCommon, IntegrationConfirmation}; use crate::integrations::integr_cmdline::*; -use crate::integrations::setting_up_integrations::YamlError; +use crate::custom_error::YamlError; const REALLY_HORRIBLE_ROUNDTRIP: u64 = 3000; // 3000 should be a really bad ping via internet, just in rare case it's a remote port diff --git a/refact-agent/engine/src/integrations/integr_shell.rs b/refact-agent/engine/src/integrations/integr_shell.rs index 90460a053..802a00ad7 100644 --- a/refact-agent/engine/src/integrations/integr_shell.rs +++ b/refact-agent/engine/src/integrations/integr_shell.rs @@ -16,7 +16,7 @@ use crate::tools::tools_description::{ToolParam, Tool, ToolDesc, MatchConfirmDen use crate::call_validation::{ChatMessage, ChatContent, ContextEnum}; use crate::postprocessing::pp_command_output::CmdlineOutputFilter; use crate::integrations::integr_abstract::{IntegrationCommon, IntegrationTrait}; -use crate::integrations::setting_up_integrations::YamlError; +use crate::custom_error::YamlError; use crate::tools::tools_execute::command_should_be_denied; diff --git a/refact-agent/engine/src/integrations/process_io_utils.rs b/refact-agent/engine/src/integrations/process_io_utils.rs index 71c6a6ce8..42860d49c 100644 --- a/refact-agent/engine/src/integrations/process_io_utils.rs +++ b/refact-agent/engine/src/integrations/process_io_utils.rs @@ -172,7 +172,7 @@ pub async fn execute_command(mut cmd: Command, timeout_secs: u64, cmd_str: &str) let mut child = ChildWithKillOnDrop(child); tokio::time::timeout( - tokio::time::Duration::from_secs(timeout_secs), + tokio::time::Duration::from_secs(timeout_secs), wait_with_output(child.0.inner_mut()) ).await .map_err(|_| format!("command '{cmd_str}' timed out after {timeout_secs} seconds"))? diff --git a/refact-agent/engine/src/integrations/project_summary_chat.rs b/refact-agent/engine/src/integrations/project_summary_chat.rs index ed2647e2c..4c24df266 100644 --- a/refact-agent/engine/src/integrations/project_summary_chat.rs +++ b/refact-agent/engine/src/integrations/project_summary_chat.rs @@ -18,12 +18,7 @@ pub async fn mix_project_summary_messages( let mut error_log = Vec::new(); let custom = crate::yaml_configs::customization_loader::load_customization(gcx.clone(), true, &mut error_log).await; for e in error_log.iter() { - tracing::error!( - "{}:{} {:?}", - crate::nicer_logs::last_n_chars(&e.integr_config_path, 30), - e.error_line, - e.error_msg, - ); + tracing::error!("{e}"); } diff --git a/refact-agent/engine/src/integrations/running_integrations.rs b/refact-agent/engine/src/integrations/running_integrations.rs index ee5ac33d2..e49676075 100644 --- a/refact-agent/engine/src/integrations/running_integrations.rs +++ b/refact-agent/engine/src/integrations/running_integrations.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use indexmap::IndexMap; use tokio::sync::RwLock as ARwLock; +use crate::custom_error::YamlError; use crate::tools::tools_description::Tool; use crate::global_context::GlobalContext; use crate::integrations::integr_abstract::IntegrationTrait; @@ -29,14 +30,14 @@ pub async fn load_integration_tools( } /// Loads and set up integrations from config files. -/// -/// If `include_paths_matching` is `None`, all integrations are loaded, +/// +/// If `include_paths_matching` is `None`, all integrations are loaded, /// otherwise only those matching `include_paths_matching` glob patterns. pub async fn load_integrations( gcx: Arc>, allow_experimental: bool, include_paths_matching: &[String], -) -> (IndexMap>, Vec) { +) -> (IndexMap>, Vec) { let active_project_path = crate::files_correction::get_active_project_path(gcx.clone()).await; let (config_dirs, global_config_dir) = crate::integrations::setting_up_integrations::get_config_dirs(gcx.clone(), &active_project_path).await; let (integrations_yaml_path, is_inside_container) = { @@ -44,7 +45,7 @@ pub async fn load_integrations( (gcx_locked.cmdline.integrations_yaml.clone(), gcx_locked.cmdline.inside_container) }; - let mut error_log: Vec = Vec::new(); + let mut error_log: Vec = Vec::new(); let lst: Vec<&str> = crate::integrations::integrations_list(allow_experimental); let vars_for_replacements = crate::integrations::setting_up_integrations::get_vars_for_replacements(gcx.clone(), &mut error_log).await; let records = crate::integrations::setting_up_integrations::read_integrations_d( @@ -73,8 +74,8 @@ pub async fn load_integrations( let should_be_fine = integr.integr_settings_apply(gcx.clone(), rec.integr_config_path.clone(), &rec.config_unparsed).await; if let Err(err) = should_be_fine { let error_line = err.line(); - error_log.push(crate::integrations::setting_up_integrations::YamlError { - integr_config_path: rec.integr_config_path.clone(), + error_log.push(YamlError { + path: rec.integr_config_path.clone(), error_line, error_msg: format!("failed to apply settings: {}", err), }); @@ -83,12 +84,7 @@ pub async fn load_integrations( } for e in error_log.iter() { - tracing::error!( - "{}:{} {:?}", - crate::nicer_logs::last_n_chars(&e.integr_config_path, 30), - e.error_line, - e.error_msg, - ); + tracing::error!("{e}"); } (integrations_map, error_log) diff --git a/refact-agent/engine/src/integrations/setting_up_integrations.rs b/refact-agent/engine/src/integrations/setting_up_integrations.rs index 1614e6b31..501229b83 100644 --- a/refact-agent/engine/src/integrations/setting_up_integrations.rs +++ b/refact-agent/engine/src/integrations/setting_up_integrations.rs @@ -8,29 +8,12 @@ use serde_json::{json, Value}; use tokio::sync::RwLock as ARwLock; use tokio::fs as async_fs; use tokio::io::AsyncWriteExt; +use crate::custom_error::YamlError; use crate::global_context::GlobalContext; use crate::files_correction::any_glob_matches_path; // use crate::tools::tools_description::Tool; // use crate::yaml_configs::create_configs::{integrations_enabled_cfg, read_yaml_into_value}; - -#[derive(Serialize, Default)] -pub struct YamlError { - pub integr_config_path: String, - pub error_line: usize, // starts with 1, zero if invalid - pub error_msg: String, -} - -impl From<(&str, &serde_yaml::Error)> for YamlError { - fn from((path, err): (&str, &serde_yaml::Error)) -> Self { - YamlError { - integr_config_path: path.to_string(), - error_line: err.location().map(|loc| loc.line()).unwrap_or(0), - error_msg: err.to_string(), - } - } -} - #[derive(Serialize, Default, Debug, Clone)] pub struct IntegrationRecord { pub project_path: String, @@ -120,7 +103,7 @@ pub fn read_integrations_d( Err(e) => { tracing::warn!("failed to read {}: {}", integrations_yaml_path, e); error_log.push(YamlError { - integr_config_path: integrations_yaml_path.clone(), + path: integrations_yaml_path.clone(), error_line: 0, error_msg: e.to_string(), }); @@ -213,7 +196,7 @@ pub fn read_integrations_d( }, Err(e) => { error_log.push(YamlError { - integr_config_path: path_str.to_string(), + path: path_str.to_string(), error_line: 0, error_msg: e.to_string(), }); @@ -353,7 +336,7 @@ pub async fn get_vars_for_replacements( Err(e) => { tracing::warn!("Failed to parse {}: {}", path.display(), e); error_log.push(YamlError { - integr_config_path: path.to_string_lossy().to_string(), + path: path.to_string_lossy().to_string(), error_line: e.location().map(|loc| loc.line()).unwrap_or(0), error_msg: format!("Failed to parse {}: {}", path.display(), e), }); @@ -363,7 +346,7 @@ pub async fn get_vars_for_replacements( Err(e) => { tracing::info!("Failed to read {}: {}", path.display(), e); error_log.push(YamlError { - integr_config_path: path.to_string_lossy().to_string(), + path: path.to_string_lossy().to_string(), error_line: 0, error_msg: format!("Failed to read {}: {}", path.display(), e), }); @@ -516,7 +499,7 @@ pub async fn integration_config_get( Ok(_) => {} Err(err) => { result.error_log.push(YamlError { - integr_config_path: better_integr_config_path.clone(), + path: better_integr_config_path.clone(), error_line: err.line(), error_msg: err.to_string(), }); @@ -532,7 +515,7 @@ pub async fn integration_config_get( } Err(err) => { result.error_log.push(YamlError { - integr_config_path: better_integr_config_path.clone(), + path: better_integr_config_path.clone(), error_line: err.location().map(|loc| loc.line()).unwrap_or(0), error_msg: err.to_string(), }); diff --git a/refact-agent/engine/src/knowledge.rs b/refact-agent/engine/src/knowledge.rs index 64105e573..74e8e4f7f 100644 --- a/refact-agent/engine/src/knowledge.rs +++ b/refact-agent/engine/src/knowledge.rs @@ -242,7 +242,7 @@ impl MemoriesDatabase { | rusqlite::OpenFlags::SQLITE_OPEN_URI, ).await.map_err(|err| format!("Failed to open database: {}", err))?; setup_db(&conn, pubsub_notifier.clone()).await?; - migrate_202501(&conn, constants.embedding_size, emb_table_name.clone(), reset_memory).await?; + migrate_202501(&conn, constants.embedding_model.embedding_size, emb_table_name.clone(), reset_memory).await?; crate::vecdb::vdb_emb_aux::cleanup_old_emb_tables(&conn, 7, 10).await?; let db = MemoriesDatabase { @@ -504,7 +504,6 @@ pub async fn vectorize_dirty_memories( vecdb_handler: Arc>, _status: Arc>, client: Arc>, - api_key: &String, #[allow(non_snake_case)] B: usize, ) -> rusqlite::Result<(), String> { @@ -525,13 +524,10 @@ pub async fn vectorize_dirty_memories( let my_constants: VecdbConstants = memdb.lock().await.vecdb_constants.clone(); for chunk in to_vectorize.chunks_mut(B) { let texts: Vec = chunk.iter().map(|x| x.window_text.clone()).collect(); - let embedding_mb = crate::fetch_embedding::get_embedding_with_retry( + let embedding_mb = crate::fetch_embedding::get_embedding_with_retries( client.clone(), - &my_constants.endpoint_embeddings_style, &my_constants.embedding_model, - &my_constants.endpoint_embeddings_template, texts, - api_key, 1, ).await?; for (chunk_save, x) in chunk.iter_mut().zip(embedding_mb.iter()) { diff --git a/refact-agent/engine/src/known_models.json b/refact-agent/engine/src/known_models.json new file mode 100644 index 000000000..0967c6c38 --- /dev/null +++ b/refact-agent/engine/src/known_models.json @@ -0,0 +1,964 @@ +{ + "completion_models": { + "starcoder": { + "n_ctx": 4096, + "scratchpad_patch": { + "context_format": "starcoder", + "rag_ratio": 0.5 + }, + "scratchpad": "FIM-PSM", + "tokenizer": "hf://bigcode/starcoder2-3b", + "similar_models": [ + "bigcode/starcoder", + "bigcode/starcoderbase", + "starcoder/15b/base", + "starcoder/15b/plus", + "starcoder/1b/base", + "starcoder/3b/base", + "starcoder/7b/base", + "wizardcoder/15b", + "starcoder/1b/vllm", + "starcoder/3b/vllm", + "starcoder/7b/vllm", + "starcoder2/3b/base", + "starcoder2/7b/base", + "starcoder2/15b/base", + "starcoder2/3b/vllm", + "starcoder2/7b/vllm", + "starcoder2/15b/vllm", + "starcoder2/3b/neuron", + "starcoder2/7b/neuron", + "starcoder2/15b/neuron", + "starcoder2/3b", + "starcoder2/7b", + "starcoder2/15b", + "bigcode/starcoder2-3b", + "bigcode/starcoder2-7b", + "bigcode/starcoder2-15b" + ] + }, + "smallcloudai/Refact-1_6B-fim": { + "n_ctx": 4096, + "tokenizer": "hf://smallcloudai/Refact-1_6B-fim", + "scratchpad": "FIM-SPM", + "similar_models": [ + "Refact/1.6B", + "Refact/1.6B/vllm" + ] + }, + "codellama/CodeLlama-13b-hf": { + "n_ctx": 4096, + "scratchpad_patch": { + "fim_prefix": "
",
+                "fim_suffix": "",
+                "fim_middle": "",
+                "eot": "",
+                "eos": ""
+            },
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://codellama/CodeLlama-13b-hf",
+            "similar_models": [
+                "codellama/7b"
+            ]
+        },
+        "deepseek-coder": {
+            "n_ctx": 4096,
+            "scratchpad_patch": {
+                "fim_prefix": "<|fim▁begin|>",
+                "fim_suffix": "<|fim▁hole|>",
+                "fim_middle": "<|fim▁end|>",
+                "eot": "<|EOT|>"
+            },
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-1.3b-base",
+            "similar_models": [
+                "deepseek-coder/1.3b/base",
+                "deepseek-coder/5.7b/mqa-base",
+                "deepseek-coder/1.3b/vllm",
+                "deepseek-coder/5.7b/vllm",
+                "deepseek-ai/deepseek-coder-1.3b-base"
+            ]
+        },
+        "stable/3b/code": {
+            "n_ctx": 4096,
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://stabilityai/stable-code-3b",
+            "similar_models": []
+        },
+        "llama3/8b/instruct": {
+            "n_ctx": 8192,
+            "scratchpad_patch": {
+                "token_bos": "<|begin_of_text|>",
+                "token_esc": "<|eot_id|>",
+                "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
+                "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
+                "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+                "eot": "<|eot_id|>",
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "scratchpad": "REPLACE",
+            "tokenizer": "hf://Xenova/llama3-tokenizer",
+            "similar_models": [
+                "llama3/8b/instruct/neuron",
+                "llama3.1/8b/instruct",
+                "llama3.2/3b/instruct",
+                "llama3.2/1b/instruct"
+            ]
+        },
+        "deepseek-coder/6.7b/instruct-finetune/vllm": {
+            "n_ctx": 4096,
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-6.7b-instruct",
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            }
+        },
+        "llama3/8b/instruct/vllm": {
+            "n_ctx": 8192,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/llama3-tokenizer",
+            "similar_models": [
+                "llama3.1/8b/instruct/vllm"
+            ]
+        },
+        "llama3.2/1b/instruct/vllm": {
+            "n_ctx": 16384,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://meta-llama/llama-3.2-1b-instruct",
+            "similar_models": [
+                "llama3.2/3b/instruct/vllm"
+            ]
+        },
+        "qwen2.5/coder/1.5b/instruct/vllm": {
+            "n_ctx": 32768,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/3b/instruct/vllm",
+                "qwen2.5/coder/7b/instruct/vllm",
+                "qwen2.5/coder/14b/instruct/vllm",
+                "qwen2.5/coder/32b/instruct/vllm",
+                "qwen2.5/7b/instruct/vllm",
+                "qwen2.5/14b/instruct/vllm",
+                "qwen2.5/32b/instruct/vllm"
+            ]
+        },
+        "gpt-4o": {
+            "n_ctx": 128000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4o-2024-05-13",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "chatgpt-4o",
+                "openai/gpt-4o",
+                "openai/gpt-4o-2024-05-13",
+                "openai/gpt-4o-2024-08-06",
+                "openai/gpt-4o-mini",
+                "openai/gpt-4o-mini-2024-07-18",
+                "openai/chatgpt-4o"
+            ]
+        },
+        "claude-3-sonnet": {
+            "n_ctx": 200000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/claude-tokenizer",
+            "similar_models": [
+                "claude-3-haiku",
+                "claude-3-5-haiku",
+                "claude-3-5-haiku-20241022",
+                "claude-3-opus",
+                "claude-3-5-sonnet",
+                "claude-3-5-sonnet-20241022",
+                "claude-3-7-sonnet",
+                "claude-3-7-sonnet-20250219",
+                "anthropic/claude-3-sonnet",
+                "anthropic/claude-3-haiku",
+                "anthropic/claude-3-5-haiku",
+                "anthropic/claude-3-5-haiku-20241022",
+                "anthropic/claude-3-opus",
+                "anthropic/claude-3-5-sonnet",
+                "anthropic/claude-3-5-sonnet-20241022",
+                "anthropic/claude-3-7-sonnet",
+                "anthropic/claude-3-7-sonnet-20250219"
+            ]
+        },
+        "groq-llama-3.1-8b": {
+            "n_ctx": 128000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/Meta-Llama-3.1-Tokenizer",
+            "similar_models": [
+                "groq-llama-3.1-70b",
+                "groq-llama-3.2-1b",
+                "groq-llama-3.2-3b",
+                "groq-llama-3.2-11b-vision",
+                "groq-llama-3.2-90b-vision"
+            ]
+        },
+        "cerebras-llama3.1-8b": {
+            "n_ctx": 8192,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/Meta-Llama-3.1-Tokenizer",
+            "similar_models": [
+                "cerebras-llama3.1-70b"
+            ]
+        },
+        "gemini-2.0-flash-exp": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "gemini-1.5-flash",
+                "gemini-1.5-flash-8b"
+            ]
+        },
+        "gemini-1.5-pro": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "gemini-2.0-exp-advanced",
+                "gemini-2.5-pro"
+            ]
+        },
+        "grok-beta": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/grok-1-tokenizer",
+            "similar_models": [
+                "grok-2-1212",
+                "grok-2"
+            ]
+        },
+        "grok-vision-beta": {
+            "n_ctx": 8192,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2-vision-1212": {
+            "n_ctx": 32000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/grok-1-tokenizer",
+            "similar_models": [
+                "grok-2-vision"
+            ]
+        },
+        "deepseek-chat": {
+            "n_ctx": 64000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://deepseek-ai/DeepSeek-V3"
+        },
+        "qwen2.5/coder/0.5b/instruct": {
+            "n_ctx": 8192,
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<|im_start|>system\n",
+                "keyword_user": "<|im_start|>user\n",
+                "keyword_assistant": "<|im_start|>assistant\n",
+                "eot": "<|im_end|>",
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "scratchpad": "REPLACE",
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-0.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/1.5b/instruct",
+                "qwen2.5/coder/3b/instruct",
+                "qwen2.5/coder/7b/instruct/gptq8bit",
+                "qwen2.5/coder/7b/instruct",
+                "qwen2.5/coder/14b/instruct/gptq8bit",
+                "qwen2.5/coder/14b/instruct",
+                "qwen2.5/coder/32b/instruct/gptq8bit",
+                "qwen2.5/coder/32b/instruct"
+            ]
+        },
+        "qwen2.5-coder-base": {
+            "n_ctx": 8192,
+            "scratchpad_patch": {
+                "fim_prefix": "<|fim_prefix|>",
+                "fim_suffix": "<|fim_suffix|>",
+                "fim_middle": "<|fim_middle|>",
+                "eot": "<|endoftext|>",
+                "extra_stop_tokens": [
+                    "<|repo_name|>",
+                    "<|file_sep|>",
+                    "<|fim_pad|>",
+                    "<|cursor|>"
+                ],
+                "context_format": "qwen2.5",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-0.5B",
+            "scratchpad": "FIM-PSM",
+            "similar_models": [
+                "qwen2.5/coder/0.5b/base",
+                "qwen2.5/coder/1.5b/base",
+                "qwen2.5/coder/3b/base",
+                "qwen2.5/coder/7b/base",
+                "qwen2.5/coder/14b/base",
+                "qwen2.5/coder/32b/base",
+                "qwen2.5/coder/0.5b/base/vllm",
+                "qwen2.5/coder/1.5b/base/vllm",
+                "qwen2.5/coder/3b/base/vllm",
+                "qwen2.5/coder/7b/base/vllm",
+                "qwen2.5/coder/14b/base/vllm",
+                "qwen2.5/coder/32b/base/vllm"
+            ]
+        }
+    },
+    "chat_models": {
+        "gpt-4o": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4o-2024-05-13",
+                "gpt-4o-2024-08-06",
+                "openai/gpt-4o",
+                "openai/gpt-4o-2024-05-13",
+                "openai/gpt-4o-2024-08-06"
+            ]
+        },
+        "gpt-4o-mini": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "gpt-4o-mini-2024-07-18",
+                "openai/gpt-4o-mini",
+                "openai/gpt-4o-min-2024-07-18"
+            ],
+            "tokenizer": "hf://Xenova/gpt-4o"
+        },
+        "o1": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_reasoning": "openai",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/o1"
+            ]
+        },
+        "o1-mini": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_reasoning": "openai",
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/o1-mini"
+            ]
+        },
+        "o3-mini": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "supports_agent": true,
+            "supports_reasoning": "openai",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/o3-mini"
+            ]
+        },
+        "gpt-4.5-preview": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/gpt-4.5-preview"
+            ]
+        },
+        "gpt-4.1": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4.1-2025-04-14",
+                "openai/gpt-4.1",
+                "openai/gpt-4.1-2025-04-14"
+            ]
+        },
+        "gpt-4.1-mini": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4.1-nano",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "openai/gpt-4.1-mini",
+                "openai/gpt-4.1-nano",
+                "openai/gpt-4.1-mini-2025-04-14",
+                "openai/gpt-4.1-nano-2025-04-14"
+            ]
+        },
+        "chatgpt-4o": {
+            "n_ctx": 128000,
+            "supports_tools": false,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "chatgpt-4o-latest",
+                "openai/chatgpt-4o",
+                "openai/chatgpt-4o-latest"
+            ]
+        },
+        "claude-instant-1.2": {
+            "n_ctx": 8096,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "claude-2.1",
+                "claude-3-haiku",
+                "claude-3-opus",
+                "claude-3-sonnet",
+                "anthropic/claude-instant-1.2",
+                "anthropic/claude-2.1",
+                "anthropic/claude-3-haiku",
+                "anthropic/claude-3-opus",
+                "anthropic/claude-3-sonnet"
+            ],
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "claude-3-5-sonnet": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/claude-tokenizer",
+            "similar_models": [
+                "claude-3-5-sonnet-20240620",
+                "claude-3-5-sonnet-latest",
+                "anthropic/claude-3-5-sonnet-20240620",
+                "anthropic/claude-3.5-sonnet",
+                "anthropic/claude-3-5-sonnet-latest"
+            ]
+        },
+        "claude-3-5-sonnet-20241022": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_clicks": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "claude-3-5-haiku": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "claude-3-5-haiku-20241022",
+                "claude-3-5-haiku-latest",
+                "anthropic/claude-3-5-haiku",
+                "anthropic/claude-3-5-haiku-latest"
+            ],
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "claude-3-7-sonnet": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_clicks": true,
+            "supports_agent": true,
+            "supports_reasoning": "anthropic",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "claude-3-7-sonnet-20250219",
+                "claude-3-7-sonnet-latest",
+                "anthropic/claude-3.7-sonnet",
+                "anthropic/claude-3-7-sonnet",
+                "anthropic/claude-3.7-sonnet-latest",
+                "anthropic/claude-3-7-sonnet-latest"
+            ],
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "gemini-2.5-pro": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "gemini-2.5-pro-exp-03-25",
+                "gemini-2.5-pro-preview-03-25"
+            ]
+        },
+        "gemini-2.0-flash": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "models/gemini-2.0-flash"
+            ]
+        },
+        "gemini-2.0-flash-exp": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "gemini-1.5-flash",
+                "gemini-1.5-flash-8b"
+            ],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        },
+        "gemini-2.0-flash-lite": {
+            "n_ctx": 200000,
+            "supports_tools": false,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "models/gemini-2.0-flash-lite"
+            ]
+        },
+        "gemini-1.5-pro": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "gemini-2.0-exp-advanced"
+            ],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        },
+        "llama3/8b/instruct": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
+                "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
+                "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+                "eot": "<|eot_id|>",
+                "stop_list": [
+                    "<|eot_id|>"
+                ]
+            },
+            "tokenizer": "hf://Xenova/llama3-tokenizer",
+            "similar_models": [
+                "llama3/8b/instruct/neuron",
+                "meta-llama/llama-3.1-8b-instruct",
+                "llama3.1/8b/instruct",
+                "llama3.2/3b/instruct",
+                "llama3.2/1b/instruct"
+            ]
+        },
+        "deepseek-coder/6.7b/instruct-finetune/vllm": {
+            "n_ctx": 4096,
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-6.7b-instruct",
+            "scratchpad": "PASSTHROUGH"
+        },
+        "llama3/8b/instruct/vllm": {
+            "n_ctx": 8192,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://meta-llama/Meta-Llama-3-8B-Instruct",
+            "similar_models": [
+                "llama3.1/8b/instruct/vllm"
+            ]
+        },
+        "llama3.2/1b/instruct/vllm": {
+            "n_ctx": 16384,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://meta-llama/Llama-3.2-1B-Instruct",
+            "similar_models": [
+                "llama3.2/3b/instruct/vllm",
+                "llama3.3/70b/instruct/vllm"
+            ]
+        },
+        "mistral/24b/instruct/vllm": {
+            "n_ctx": 16384,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "mistralai/Mistral-Small-24B-Instruct-2501",
+            "similar_models": []
+        },
+        "qwen2.5/coder/1.5b/instruct/vllm": {
+            "n_ctx": 32768,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/3b/instruct/vllm",
+                "qwen2.5/coder/7b/instruct/vllm",
+                "qwen2.5/coder/14b/instruct/vllm",
+                "qwen2.5/coder/32b/instruct/vllm"
+            ]
+        },
+        "qwen2.5/7b/instruct/vllm": {
+            "n_ctx": 32768,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/Qwen2.5-7B-Instruct",
+            "similar_models": [
+                "qwen2.5/14b/instruct/vllm",
+                "qwen2.5/32b/instruct/vllm"
+            ]
+        },
+        "qwen-qwq/32b/vllm": {
+            "n_ctx": 32768,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/QwQ-32B",
+            "similar_models": [
+                "qwen-qwq/32b/awq/vllm"
+            ]
+        },
+        "qwen-qwq-32b": {
+            "n_ctx": 131072,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/QwQ-32B",
+            "similar_models": [
+                "groq/qwen-qwq-32b"
+            ]
+        },
+        "wizardlm/7b": {
+            "n_ctx": 2048,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "\nUSER: ",
+                "keyword_assistant": "\nASSISTANT: ",
+                "eot": "",
+                "stop_list": [
+                    "\n\n"
+                ]
+            },
+            "tokenizer": "hf://cognitivecomputations/WizardLM-7B-Uncensored",
+            "similar_models": [
+                "wizardlm/13b",
+                "wizardlm/30b"
+            ]
+        },
+        "magicoder/6.7b": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "\n@@ Instruction\n",
+                "keyword_assistant": "\n@@ Response\n",
+                "stop_list": [],
+                "eot": "<|EOT|>"
+            },
+            "tokenizer": "hf://ise-uiuc/Magicoder-S-DS-6.7B"
+        },
+        "mistral/7b/instruct-v0.1": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "[INST] ",
+                "keyword_assistant": "[/INST]\n",
+                "stop_list": [],
+                "eot": ""
+            },
+            "tokenizer": "hf://mistralai/Mistral-7B-Instruct-v0.1",
+            "similar_models": [
+                "mixtral/8x7b/instruct-v0.1"
+            ]
+        },
+        "phind/34b/v2": {
+            "n_ctx": 4095,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "### System Prompt\n",
+                "keyword_user": "\n### User Message\n",
+                "keyword_assistant": "\n### Assistant\n",
+                "stop_list": [],
+                "eot": ""
+            },
+            "tokenizer": "hf://Phind/Phind-CodeLlama-34B-v2"
+        },
+        "deepseek-coder/6.7b/instruct": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "### Instruction:\n",
+                "keyword_assistant": "### Response:\n",
+                "stop_list": [],
+                "eot": "<|EOT|>"
+            },
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-6.7b-instruct",
+            "similar_models": [
+                "deepseek-coder/33b/instruct",
+                "deepseek-coder/6.7b/instruct-finetune"
+            ]
+        },
+        "groq-llama-3.1-8b": {
+            "n_ctx": 128000,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "groq-llama-3.1-70b",
+                "groq-llama-3.2-1b",
+                "groq-llama-3.2-3b",
+                "groq-llama-3.2-11b-vision",
+                "groq-llama-3.2-90b-vision"
+            ]
+        },
+        "cerebras-llama3.1-8b": {
+            "n_ctx": 8192,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/Meta-Llama-3.1-Tokenizer",
+            "similar_models": [
+                "cerebras-llama3.1-70b"
+            ]
+        },
+        "grok-3-beta": {
+            "n_ctx": 131072,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-3-mini-beta": {
+            "n_ctx": 131072,
+            "supports_tools": true,
+            "supports_agent": true,
+            "supports_reasoning": "openai",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-beta": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-vision-beta": {
+            "n_ctx": 8192,
+            "supports_tools": false,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2-vision-1212": {
+            "n_ctx": 32000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2-1212": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "deepseek-chat": {
+            "n_ctx": 64000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://deepseek-ai/DeepSeek-V3"
+        },
+        "deepseek-reasoner": {
+            "n_ctx": 64000,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "supports_reasoning": "deepseek",
+            "default_temperature": 0.6,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://deepseek-ai/DeepSeek-R1"
+        },
+        "qwen2.5/coder/0.5b/instruct": {
+            "n_ctx": 8192,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<|im_start|>system\n",
+                "keyword_user": "<|im_start|>user\n",
+                "keyword_assistant": "<|im_start|>assistant\n",
+                "eot": "<|im_end|>",
+                "stop_list": [
+                    "<|im_end|>"
+                ]
+            },
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-0.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/1.5b/instruct",
+                "qwen2.5/coder/3b/instruct",
+                "qwen2.5/coder/7b/instruct/gptq8bit",
+                "qwen2.5/coder/7b/instruct",
+                "qwen2.5/coder/14b/instruct/gptq8bit",
+                "qwen2.5/coder/14b/instruct",
+                "qwen2.5/coder/32b/instruct/gptq8bit",
+                "qwen2.5/coder/32b/instruct"
+            ]
+        },
+        "deepseek-r1-distill/1.5b/vllm": {
+            "n_ctx": 32768,
+            "supports_reasoning": "deepseek",
+            "default_temperature": 0.6,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+            "similar_models": [
+                "deepseek-r1-distill/7b/vllm",
+                "deepseek-r1-distill/8b/vllm",
+                "deepseek-r1-distill/14b/vllm",
+                "deepseek-r1-distill/32b/vllm",
+                "deepseek-r1-distill/70b/vllm"
+            ]
+        }
+    },
+    "embedding_models": {
+        "thenlper/gte-base": {
+            "n_ctx": 512,
+            "embedding_size": 768,
+            "rejection_threshold": 0.25,
+            "tokenizer": "hf://thenlper/gte-base"
+        },
+        "text-embedding-3-small": {
+            "n_ctx": 8191,
+            "embedding_size": 1536,
+            "rejection_threshold": 0.63,
+            "similar_models": [
+                "openai/text-embedding-3-small"
+            ],
+            "tokenizer": "hf://Xenova/text-embedding-ada-002"
+        },
+        "gemini-embedding-exp": {
+            "n_ctx": 8192,
+            "embedding_size": 3072,
+            "similar_models": [
+                "gemini-embedding-exp-03-07"
+            ],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        },
+        "models/text-embedding-004": {
+            "n_ctx": 2048,
+            "embedding_size": 768,
+            "similar_models": [],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        }
+    },
+    "comments": [
+        "gemini and gemma bear the same tokenizer",
+        "according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677",
+        "downloadable tokenizer.json does not exist for gemini, the only precise way is to use web-requests",
+
+        "XAI WARNING: tokenizer is non-precise as there's no publicly available tokenizer for these models",
+        "XAI says that for exact same model different tokenizers could be used",
+        "therefore, using tokenizer for grok-1 which may or may not provide proximate enough results"
+    ]
+}
diff --git a/refact-agent/engine/src/known_models.rs b/refact-agent/engine/src/known_models.rs
deleted file mode 100644
index e81e8186a..000000000
--- a/refact-agent/engine/src/known_models.rs
+++ /dev/null
@@ -1,904 +0,0 @@
-pub const KNOWN_MODELS: &str = r####"
-{
-    "code_completion_models": {
-        "bigcode/starcoder": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "context_format": "starcoder",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "bigcode/starcoderbase",
-                "starcoder/15b/base",
-                "starcoder/15b/plus",
-                "starcoder/1b/base",
-                "starcoder/3b/base",
-                "starcoder/7b/base",
-                "wizardcoder/15b",
-                "starcoder/1b/vllm",
-                "starcoder/3b/vllm",
-                "starcoder/7b/vllm",
-                "starcoder2/3b/base",
-                "starcoder2/7b/base",
-                "starcoder2/15b/base",
-                "starcoder2/3b/vllm",
-                "starcoder2/7b/vllm",
-                "starcoder2/15b/vllm",
-                "starcoder2/3b/neuron",
-                "starcoder2/7b/neuron",
-                "starcoder2/15b/neuron",
-                "starcoder2/3b",
-                "starcoder2/7b",
-                "starcoder2/15b",
-                "bigcode/starcoder2-3b",
-                "bigcode/starcoder2-7b",
-                "bigcode/starcoder2-15b"
-            ]
-        },
-        "smallcloudai/Refact-1_6B-fim": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-SPM": { }
-            },
-            "default_scratchpad": "FIM-SPM",
-            "similar_models": [
-                "Refact/1.6B",
-                "Refact/1.6B/vllm"
-            ]
-        },
-        "codellama/CodeLlama-13b-hf": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "fim_prefix": "
",
-                    "fim_suffix": "",
-                    "fim_middle": "",
-                    "eot": "",
-                    "eos": ""
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "codellama/7b"
-            ]
-        },
-        "deepseek-coder/1.3b/base": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "fim_prefix": "<|fim▁begin|>",
-                    "fim_suffix": "<|fim▁hole|>",
-                    "fim_middle": "<|fim▁end|>",
-                    "eot": "<|EOT|>"
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "deepseek-coder/5.7b/mqa-base",
-                "deepseek-coder/1.3b/vllm",
-                "deepseek-coder/5.7b/vllm"
-            ]
-        },
-        "stable/3b/code": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {}
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": []
-        },
-        "llama3/8b/instruct": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE": {
-                    "token_bos": "<|begin_of_text|>",
-                    "token_esc": "<|eot_id|>",
-                    "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
-                    "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
-                    "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
-                    "eot": "<|eot_id|>",
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "REPLACE",
-            "similar_models": [
-                "llama3/8b/instruct/neuron",
-                "llama3.1/8b/instruct",
-                "llama3.2/3b/instruct",
-                "llama3.2/1b/instruct"
-            ]
-        },
-        "deepseek-coder/6.7b/instruct-finetune/vllm": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            }
-        },
-        "llama3/8b/instruct/vllm": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "llama3.1/8b/instruct/vllm"
-            ]
-        },
-        "llama3.2/1b/instruct/vllm": {
-            "n_ctx": 16384,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "llama3.2/3b/instruct/vllm"
-            ]
-        },
-        "qwen2.5/coder/1.5b/instruct/vllm": {
-            "n_ctx": 32768,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "qwen2.5/coder/3b/instruct/vllm",
-                "qwen2.5/coder/7b/instruct/vllm",
-                "qwen2.5/coder/14b/instruct/vllm",
-                "qwen2.5/coder/32b/instruct/vllm",
-                "qwen2.5/7b/instruct/vllm",
-                "qwen2.5/14b/instruct/vllm",
-                "qwen2.5/32b/instruct/vllm"
-            ]
-        },
-        "gpt-4o": {
-            "n_ctx": 128000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "gpt-4o-2024-05-13",
-                "gpt-4o-2024-08-06",
-                "openai/gpt-4o",
-                "gpt-4o-mini",
-                "gpt-4o-mini-2024-07-18",
-                "chatgpt-4o"
-            ]
-        },
-        "claude-3-sonnet": {
-            "n_ctx": 200000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "claude-3-haiku",
-                "claude-3-5-haiku",
-                "claude-3-5-haiku-20241022",
-                "claude-3-opus",
-                "claude-3-5-sonnet",
-                "claude-3-5-sonnet-20241022",
-                "claude-3-7-sonnet",
-                "claude-3-7-sonnet-20250219"
-            ]
-        },
-        "groq-llama-3.1-8b": {
-            "n_ctx": 128000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "groq-llama-3.1-70b",
-                "groq-llama-3.2-1b",
-                "groq-llama-3.2-3b",
-                "groq-llama-3.2-11b-vision",
-                "groq-llama-3.2-90b-vision"
-            ]
-        },
-        "cerebras-llama3.1-8b": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "cerebras-llama3.1-70b"
-            ]
-        },
-        "gemini-2.0-flash-exp": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-1.5-flash",
-                "gemini-1.5-flash-8b"
-            ]
-        },
-        "gemini-1.5-pro": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-2.0-exp-advanced",
-                "gemini-2.5-pro"
-            ]
-        },
-        "grok-beta": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "grok-2-1212",
-                "grok-2"
-            ]
-        },
-        "grok-vision-beta": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            }
-        },
-        "grok-2-vision-1212": {
-            "n_ctx": 32000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "grok-2-vision"
-            ]
-        },
-        "deepseek-chat": {
-            "n_ctx": 64000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            }
-        },
-        "qwen2.5/coder/0.5b/instruct": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<|im_start|>system\n",
-                    "keyword_user": "<|im_start|>user\n",
-                    "keyword_assistant": "<|im_start|>assistant\n",
-                    "eot": "<|im_end|>",
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "REPLACE",
-            "similar_models": [
-                "qwen2.5/coder/1.5b/instruct",
-                "qwen2.5/coder/3b/instruct",
-                "qwen2.5/coder/7b/instruct/gptq8bit",
-                "qwen2.5/coder/7b/instruct",
-                "qwen2.5/coder/14b/instruct/gptq8bit",
-                "qwen2.5/coder/14b/instruct",
-                "qwen2.5/coder/32b/instruct/gptq8bit",
-                "qwen2.5/coder/32b/instruct"
-            ]
-        },
-        "qwen2.5/coder/0.5b/base": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "fim_prefix": "<|fim_prefix|>",
-                    "fim_suffix": "<|fim_suffix|>",
-                    "fim_middle": "<|fim_middle|>",
-                    "eot": "<|endoftext|>",
-                    "extra_stop_tokens": ["<|repo_name|>", "<|file_sep|>", "<|fim_pad|>", "<|cursor|>"],
-                    "context_format": "qwen2.5",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "qwen2.5/coder/1.5b/base",
-                "qwen2.5/coder/3b/base",
-                "qwen2.5/coder/7b/base",
-                "qwen2.5/coder/14b/base",
-                "qwen2.5/coder/32b/base",
-                "qwen2.5/coder/0.5b/base/vllm",
-                "qwen2.5/coder/1.5b/base/vllm",
-                "qwen2.5/coder/3b/base/vllm",
-                "qwen2.5/coder/7b/base/vllm",
-                "qwen2.5/coder/14b/base/vllm",
-                "qwen2.5/coder/32b/base/vllm"
-            ]
-        }
-    },
-    "code_chat_models": {
-        "gpt-4o": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4o-2024-05-13",
-                "gpt-4o-2024-08-06",
-                "openai/gpt-4o",
-                "chatgpt-4o"
-            ]
-        },
-        "gpt-4o-mini": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4o-mini-2024-07-18"
-            ]
-        },
-        "gpt-4.1": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4.1-2025-04-14"
-            ]
-        },
-        "gpt-4.1-mini": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4.1-nano",
-                "gpt-4.1-mini-2025-04-14",
-                "gpt-4.1-nano-2025-04-14"
-            ]
-        },
-        "chatgpt-4o": {
-            "n_ctx": 128000,
-            "supports_tools": false,
-            "supports_multimodality": true,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": []
-        },
-        "o1": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_reasoning": "openai",
-            "supports_boost_reasoning": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            }
-        },
-        "o1-mini": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_reasoning": "openai",
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            }
-        },
-        "o3-mini": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_agent": true,
-            "supports_reasoning": "openai",
-            "supports_boost_reasoning": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            }
-        },
-        "claude-instant-1.2": {
-            "n_ctx": 8096,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-2.1",
-                "claude-3-haiku",
-                "claude-3-opus",
-                "claude-3-sonnet"
-            ]
-        },
-        "claude-3-5-sonnet": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-3-5-sonnet-20240620"
-            ]
-        },
-        "claude-3-5-sonnet-20241022": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_clicks": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "claude-3-5-haiku": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-3-5-haiku-20241022"
-            ]
-        },
-        "claude-3-7-sonnet": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_clicks": true,
-            "supports_agent": true,
-            "supports_reasoning": "anthropic",
-            "supports_boost_reasoning": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-3-7-sonnet-20250219"
-            ]
-        },
-        "gemini-2.0-flash-exp": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-1.5-flash",
-                "gemini-1.5-flash-8b"
-            ]
-        },
-        "gemini-1.5-pro": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-2.0-exp-advanced",
-                "gemini-2.5-pro"
-            ]
-        },
-        "llama3/8b/instruct": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
-                    "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
-                    "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
-                    "eot": "<|eot_id|>",
-                    "stop_list": [
-                        "<|eot_id|>"
-                    ]
-                }
-            },
-            "similar_models": [
-                "llama3/8b/instruct/neuron",
-                "meta-llama/llama-3.1-8b-instruct",
-                "llama3.1/8b/instruct",
-                "llama3.2/3b/instruct",
-                "llama3.2/1b/instruct"
-            ]
-        },
-        "deepseek-coder/6.7b/instruct-finetune/vllm": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "llama3/8b/instruct/vllm": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "llama3.1/8b/instruct/vllm"
-            ]
-        },
-        "llama3.2/1b/instruct/vllm": {
-            "n_ctx": 16384,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "llama3.2/3b/instruct/vllm",
-                "llama3.3/70b/instruct/vllm"
-            ]
-        },
-        "mistral/24b/instruct/vllm": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-            ]
-        },
-        "qwen2.5/coder/1.5b/instruct/vllm": {
-            "n_ctx": 32768,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "qwen2.5/coder/3b/instruct/vllm",
-                "qwen2.5/coder/7b/instruct/vllm",
-                "qwen2.5/coder/14b/instruct/vllm",
-                "qwen2.5/coder/32b/instruct/vllm"
-            ]
-        },
-        "qwen2.5/7b/instruct/vllm": {
-            "n_ctx": 32768,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "qwen2.5/14b/instruct/vllm",
-                "qwen2.5/32b/instruct/vllm"
-            ]
-        },
-        "qwen-qwq/32b/vllm": {
-            "n_ctx": 32768,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "qwen-qwq/32b/awq/vllm"
-            ]
-        },
-        "wizardlm/7b": {
-            "n_ctx": 2048,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "\nUSER: ",
-                    "keyword_assistant": "\nASSISTANT: ",
-                    "eot": "",
-                    "stop_list": ["\n\n"]
-                }
-            },
-            "similar_models": [
-                "wizardlm/13b",
-                "wizardlm/30b"
-            ]
-        },
-        "magicoder/6.7b": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "\n@@ Instruction\n",
-                    "keyword_assistant": "\n@@ Response\n",
-                    "stop_list": [],
-                    "eot": "<|EOT|>"
-                }
-            }
-        },
-        "mistral/7b/instruct-v0.1": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "[INST] ",
-                    "keyword_assistant": "[/INST]\n",
-                    "stop_list": [],
-                    "eot": ""
-                }
-            },
-            "similar_models": [
-                "mixtral/8x7b/instruct-v0.1"
-            ]
-        },
-        "phind/34b/v2": {
-            "n_ctx": 4095,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "### System Prompt\n",
-                    "keyword_user": "\n### User Message\n",
-                    "keyword_assistant": "\n### Assistant\n",
-                    "stop_list": [],
-                    "eot": ""
-                }
-            }
-        },
-        "deepseek-coder/6.7b/instruct": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "### Instruction:\n",
-                    "keyword_assistant": "### Response:\n",
-                    "stop_list": [],
-                    "eot": "<|EOT|>"
-                }
-            },
-            "similar_models": [
-                "deepseek-coder/33b/instruct",
-                "deepseek-coder/6.7b/instruct-finetune"
-            ]
-        },
-        "groq-llama-3.1-8b": {
-            "n_ctx": 128000,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "groq-llama-3.1-70b",
-                "groq-llama-3.2-1b",
-                "groq-llama-3.2-3b",
-                "groq-llama-3.2-11b-vision",
-                "groq-llama-3.2-90b-vision"
-            ]
-        },
-        "cerebras-llama3.1-8b": {
-            "n_ctx": 8192,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "cerebras-llama3.1-70b"
-            ]
-        },
-        "grok-beta": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-vision-beta": {
-            "n_ctx": 8192,
-            "supports_tools": false,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-2-vision-1212": {
-            "n_ctx": 32000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-2-1212": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-2": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "deepseek-chat": {
-            "n_ctx": 64000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "deepseek-reasoner": {
-            "n_ctx": 64000,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_reasoning": "deepseek",
-            "default_temperature": 0.6,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "qwen2.5/coder/0.5b/instruct": {
-            "n_ctx": 8192,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<|im_start|>system\n",
-                    "keyword_user": "<|im_start|>user\n",
-                    "keyword_assistant": "<|im_start|>assistant\n",
-                    "eot": "<|im_end|>",
-                    "stop_list": [
-                        "<|im_end|>"
-                    ]
-                }
-            },
-            "similar_models": [
-                "qwen2.5/coder/1.5b/instruct",
-                "qwen2.5/coder/3b/instruct",
-                "qwen2.5/coder/7b/instruct/gptq8bit",
-                "qwen2.5/coder/7b/instruct",
-                "qwen2.5/coder/14b/instruct/gptq8bit",
-                "qwen2.5/coder/14b/instruct",
-                "qwen2.5/coder/32b/instruct/gptq8bit",
-                "qwen2.5/coder/32b/instruct"
-            ]
-        },
-        "deepseek-r1-distill/1.5b/vllm": {
-            "n_ctx": 32768,
-            "supports_reasoning": "deepseek",
-            "default_temperature": 0.6,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "deepseek-r1-distill/7b/vllm",
-                "deepseek-r1-distill/8b/vllm",
-                "deepseek-r1-distill/14b/vllm",
-                "deepseek-r1-distill/32b/vllm",
-                "deepseek-r1-distill/70b/vllm"
-            ]
-        }
-    },
-    "tokenizer_rewrite_path": {
-        "Refact/1.6B": "smallcloudai/Refact-1_6B-fim",
-        "starcoder2/3b": "bigcode/starcoder2-3b",
-
-        "text-embedding-3-small": "Xenova/text-embedding-ada-002",
-        "gpt-4o":                 "Xenova/gpt-4o",
-        "gpt-4o-2024-05-13":      "Xenova/gpt-4o",
-        "gpt-4o-2024-08-06":      "Xenova/gpt-4o",
-        "gpt-4o-mini":            "Xenova/gpt-4o",
-        "gpt-4o-mini-2024-07-18": "Xenova/gpt-4o",
-        "o1":                     "Xenova/gpt-4o",
-        "o1-mini":                "Xenova/gpt-4o",
-        "o3-mini":                "Xenova/gpt-4o",
-        "openai/gpt-4o":          "Xenova/gpt-4o",
-        "chatgpt-4o":             "Xenova/gpt-4o",
-
-        "gpt-4.1":                    "Xenova/gpt-4o",
-        "gpt-4.1-2025-04-14":         "Xenova/gpt-4o",
-        "gpt-4.1-mini":               "Xenova/gpt-4o",
-        "gpt-4.1-mini-2025-04-14":    "Xenova/gpt-4o",
-        "gpt-4.1-nano":               "Xenova/gpt-4o",
-        "gpt-4.1-nano-2025-04-14":    "Xenova/gpt-4o",
-
-        "claude-3-5-sonnet":          "Xenova/claude-tokenizer",
-        "claude-3-5-haiku":           "Xenova/claude-tokenizer",
-        "claude-3-5-haiku-20241022":  "Xenova/claude-tokenizer",
-        "claude-3-5-sonnet-20240620": "Xenova/claude-tokenizer",
-        "claude-3-5-sonnet-20241022": "Xenova/claude-tokenizer",
-        "claude-3-7-sonnet":          "Xenova/claude-tokenizer",
-
-        "groq-llama-3.1-8b":      "Xenova/Meta-Llama-3.1-Tokenizer",
-        "cerebras-llama3.1-8b":     "Xenova/Meta-Llama-3.1-Tokenizer",
-
-        "grok-beta": "Xenova/grok-1-tokenizer",
-        "grok-vision-beta": "Xenova/grok-1-tokenizer",
-        "grok-2": "Xenova/grok-1-tokenizer",
-        "grok-2-vision-1212": "Xenova/grok-1-tokenizer",
-        "grok-2-1212": "Xenova/grok-1-tokenizer",
-
-        "gemini-2.0-flash-exp": "Xenova/gemma2-tokenizer",
-        "gemini-1.5-flash": "Xenova/gemma2-tokenizer",
-        "gemini-1.5-flash-8b": "Xenova/gemma2-tokenizer",
-        "gemini-1.5-pro": "Xenova/gemma2-tokenizer",
-        "gemini-2.0-exp-advanced": "Xenova/gemma2-tokenizer",
-        "gemini-2.5-pro": "Xenova/gemma2-tokenizer",
-
-        "deepseek-chat":     "deepseek-ai/DeepSeek-V3",
-        "deepseek-reasoner": "deepseek-ai/DeepSeek-R1"
-    }
-}
-"####;
-
-// gemini and gemma bear the same tokenizer
-// according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677
-// downloadable tokenizer.json does not exist for gemini, the only precise way is to use web-requests
-
-
-// XAI WARNING: tokenizer is non-precise as there's no publicly available tokenizer for these models
-// XAI says that for exact same model different tokenizers could be used
-// therefore, using tokenizer for grok-1 which may or may not provide proximate enough results
diff --git a/refact-agent/engine/src/lsp.rs b/refact-agent/engine/src/lsp.rs
index 9144fbfde..1035bbec1 100644
--- a/refact-agent/engine/src/lsp.rs
+++ b/refact-agent/engine/src/lsp.rs
@@ -143,7 +143,6 @@ impl LspBackend {
                 ..Default::default()
             },
             model: "".to_string(),
-            scratchpad: "".to_string(),
             stream: false,
             no_cache: false,
             use_ast: false,
diff --git a/refact-agent/engine/src/main.rs b/refact-agent/engine/src/main.rs
index 89335b2a1..27f628fb5 100644
--- a/refact-agent/engine/src/main.rs
+++ b/refact-agent/engine/src/main.rs
@@ -48,8 +48,7 @@ mod at_commands;
 mod tools;
 mod postprocessing;
 mod completion_cache;
-mod cached_tokenizers;
-mod known_models;
+mod tokens;
 mod scratchpad_abstract;
 mod scratchpads;
 
@@ -152,12 +151,7 @@ async fn main() {
         let mut error_log = Vec::new();
         let cust = load_customization(gcx.clone(), false, &mut error_log).await;
         for e in error_log.iter() {
-            eprintln!(
-                "{}:{} {:?}",
-                crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-                e.error_line,
-                e.error_msg,
-            );
+            eprintln!("{e}");
         }
         println!("{}", serde_json::to_string_pretty(&cust).unwrap());
         std::process::exit(0);
diff --git a/refact-agent/engine/src/postprocessing/pp_context_files.rs b/refact-agent/engine/src/postprocessing/pp_context_files.rs
index a8a15460c..744c55561 100644
--- a/refact-agent/engine/src/postprocessing/pp_context_files.rs
+++ b/refact-agent/engine/src/postprocessing/pp_context_files.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock;
 use std::collections::HashSet;
 use tracing::{info, warn};
 use tokenizers::Tokenizer;
@@ -12,7 +11,7 @@ use crate::ast::ast_structs::AstDefinition;
 use crate::global_context::GlobalContext;
 use crate::nicer_logs::{first_n_chars, last_n_chars};
 use crate::postprocessing::pp_utils::{color_with_gradient_type, colorize_comments_up, colorize_if_more_useful, colorize_minus_one, colorize_parentof, downgrade_lines_if_subsymbol, pp_ast_markup_files};
-use crate::scratchpads::scratchpad_utils::count_tokens;
+use crate::tokens::count_text_tokens_with_fallback;
 
 
 pub const RESERVE_FOR_QUESTION_AND_FOLLOWUP: usize = 1024;  // tokens
@@ -236,7 +235,7 @@ pub async fn pp_color_lines(
 
 async fn pp_limit_and_merge(
     lines_in_files: &mut IndexMap>,
-    tokenizer: Arc>,
+    tokenizer: Option>,
     tokens_limit: usize,
     single_file_mode: bool,
     settings: &PostprocessSettings,
@@ -259,7 +258,7 @@ async fn pp_limit_and_merge(
         if !line_ref.take_ignoring_floor && line_ref.useful <= settings.take_floor {
             continue;
         }
-        let mut ntokens = count_tokens(&tokenizer.read().unwrap(), &line_ref.line_content);
+        let mut ntokens = count_text_tokens_with_fallback(tokenizer.clone(), &line_ref.line_content);
 
         if !files_mentioned_set.contains(&line_ref.file_ref.cpath) {
             if files_mentioned_set.len() >= settings.max_files_n {
@@ -268,7 +267,7 @@ async fn pp_limit_and_merge(
             files_mentioned_set.insert(line_ref.file_ref.cpath.clone());
             files_mentioned_sequence.push(line_ref.file_ref.cpath.clone());
             if !single_file_mode {
-                ntokens += count_tokens(&tokenizer.read().unwrap(), &line_ref.file_ref.cpath.as_str());
+                ntokens += count_text_tokens_with_fallback(tokenizer.clone(), &line_ref.file_ref.cpath.as_str());
                 ntokens += 5;  // a margin for any overhead: file_sep, new line, etc
             }
         }
@@ -350,7 +349,7 @@ async fn pp_limit_and_merge(
 pub async fn postprocess_context_files(
     gcx: Arc>,
     context_file_vec: &mut Vec,
-    tokenizer: Arc>,
+    tokenizer: Option>,
     tokens_limit: usize,
     single_file_mode: bool,
     settings: &PostprocessSettings,
diff --git a/refact-agent/engine/src/postprocessing/pp_plain_text.rs b/refact-agent/engine/src/postprocessing/pp_plain_text.rs
index e4cbc6b9a..d4b258244 100644
--- a/refact-agent/engine/src/postprocessing/pp_plain_text.rs
+++ b/refact-agent/engine/src/postprocessing/pp_plain_text.rs
@@ -1,21 +1,20 @@
-use std::sync::{Arc, RwLockReadGuard};
-use std::sync::RwLock;
+use std::sync::Arc;
 use tokenizers::Tokenizer;
 
 use crate::call_validation::{ChatContent, ChatMessage};
 use crate::scratchpads::multimodality::MultimodalElement;
-use crate::scratchpads::scratchpad_utils::count_tokens;
+use crate::tokens::count_text_tokens_with_fallback;
 
 
 fn limit_text_content(
-    tokenizer_guard: &RwLockReadGuard,
+    tokenizer: Option>,
     text: &String,
     tok_used: &mut usize,
     tok_per_m: usize,
 ) -> String {
     let mut new_text_lines = vec![];
     for line in text.lines() {
-        let line_tokens = count_tokens(tokenizer_guard, &line);
+        let line_tokens = count_text_tokens_with_fallback(tokenizer.clone(), &line);
         if tok_used.clone() + line_tokens > tok_per_m {
             if new_text_lines.is_empty() {
                 new_text_lines.push("No content: tokens limit reached");
@@ -31,7 +30,7 @@ fn limit_text_content(
 
 pub async fn postprocess_plain_text(
     plain_text_messages: Vec<&ChatMessage>,
-    tokenizer: Arc>,
+    tokenizer: Option>,
     tokens_limit: usize,
     style: &Option,
 ) -> (Vec, usize) {
@@ -45,14 +44,13 @@ pub async fn postprocess_plain_text(
     let mut tok_per_m = tokens_limit / messages_sorted.len();
     let mut new_messages = vec![];
 
-    let tokenizer_guard = tokenizer.read().unwrap();
     for (idx, msg) in messages_sorted.iter().cloned().enumerate() {
         let mut tok_used = 0;
         let mut m_cloned = msg.clone();
         
         m_cloned.content = match &msg.content {
             ChatContent::SimpleText(text) => {
-                let new_content = limit_text_content(&tokenizer_guard, text, &mut tok_used, tok_per_m);
+                let new_content = limit_text_content(tokenizer.clone(), text, &mut tok_used, tok_per_m);
                 ChatContent::SimpleText(new_content)
             },
             ChatContent::Multimodal(elements) => {
@@ -61,7 +59,7 @@ pub async fn postprocess_plain_text(
                 for element in elements {
                     if element.is_text() {
                         let mut el_cloned = element.clone();
-                        el_cloned.m_content = limit_text_content(&tokenizer_guard, &el_cloned.m_content, &mut tok_used, tok_per_m);
+                        el_cloned.m_content = limit_text_content(tokenizer.clone(), &el_cloned.m_content, &mut tok_used, tok_per_m);
                         new_content.push(el_cloned)
                     } else if element.is_image() {
                         let tokens = element.count_tokens(None, style).unwrap() as usize;
diff --git a/refact-agent/engine/src/restream.rs b/refact-agent/engine/src/restream.rs
index 60bfded17..ce0bf3bc2 100644
--- a/refact-agent/engine/src/restream.rs
+++ b/refact-agent/engine/src/restream.rs
@@ -1,6 +1,5 @@
-use std::sync::{Arc, RwLock as StdRwLock};
+use std::sync::Arc;
 use tokio::sync::Mutex as AMutex;
-use tokio::sync::RwLock as ARwLock;
 use tokio::sync::mpsc;
 use async_stream::stream;
 use futures::StreamExt;
@@ -9,124 +8,57 @@ use reqwest_eventsource::Event;
 use reqwest_eventsource::Error as REError;
 use serde_json::{json, Value};
 use tracing::info;
+use uuid;
 
 use crate::call_validation::{ChatMeta, SamplingParameters};
+use crate::caps::BaseModelRecord;
 use crate::custom_error::ScratchError;
 use crate::nicer_logs;
 use crate::scratchpad_abstract::{FinishReason, ScratchpadAbstract};
 use crate::telemetry::telemetry_structs;
 use crate::at_commands::at_commands::AtCommandsContext;
-use crate::caps::get_api_key;
-
-
-async fn _get_endpoint_and_stuff_from_model_name(
-    gcx: Arc>,
-    caps: Arc>,
-    model_name: String,
-) -> (String, String, String, String)
-{
-    let (
-        custom_apikey,
-        mut endpoint_style,
-        custom_endpoint_style,
-        mut endpoint_template,
-        custom_endpoint_template,
-        endpoint_chat_passthrough,
-    ) = {
-        let caps_locked = caps.read().unwrap();
-        if caps_locked.code_chat_models.contains_key(&model_name) {
-            (
-                caps_locked.chat_apikey.clone(),
-                caps_locked.endpoint_style.clone(),      // abstract
-                caps_locked.chat_endpoint_style.clone(), // chat-specific
-                caps_locked.endpoint_template.clone(),   // abstract
-                caps_locked.chat_endpoint.clone(),       // chat-specific
-                caps_locked.endpoint_chat_passthrough.clone(),
-            )
-        } else {
-            (
-                caps_locked.completion_apikey.clone(),
-                caps_locked.endpoint_style.clone(),             // abstract
-                caps_locked.completion_endpoint_style.clone(),  // completion-specific
-                caps_locked.endpoint_template.clone(),          // abstract
-                caps_locked.completion_endpoint.clone(),        // completion-specific
-                "".to_string(),
-            )
-        }
-    };
-    let api_key = get_api_key(gcx, custom_apikey).await;
-    if !custom_endpoint_style.is_empty() {
-        endpoint_style = custom_endpoint_style;
-    }
-    if !custom_endpoint_template.is_empty() {
-        endpoint_template = custom_endpoint_template;
-    }
-    (
-        api_key,
-        endpoint_template,
-        endpoint_style,
-        endpoint_chat_passthrough,
-    )
-}
+
 
 pub async fn scratchpad_interaction_not_stream_json(
     ccx: Arc>,
     scratchpad: &mut Box,
     scope: String,
     prompt: &str,
-    model_name: String,
+    model_rec: &BaseModelRecord,
     parameters: &SamplingParameters,  // includes n
     only_deterministic_messages: bool,
     meta: Option
 ) -> Result {
     let t2 = std::time::SystemTime::now();
     let gcx = ccx.lock().await.global_context.clone();
-    let (client, caps, tele_storage, slowdown_arc) = {
+    let (client, tele_storage, slowdown_arc) = {
         let gcx_locked = gcx.write().await;
-        let caps = gcx_locked.caps.clone()
-            .ok_or(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, "No caps available".to_string()))?;
         (
             gcx_locked.http_client.clone(),
-            caps,
             gcx_locked.telemetry.clone(),
             gcx_locked.http_client_slowdown.clone()
         )
     };
-    let (
-        bearer,
-        endpoint_template,
-        endpoint_style,
-        endpoint_chat_passthrough,
-    ) = _get_endpoint_and_stuff_from_model_name(gcx.clone(), caps.clone(), model_name.clone()).await;
 
     let mut save_url: String = String::new();
     let _ = slowdown_arc.acquire().await;
-    let metadata_supported = crate::global_context::is_metadata_supported(gcx.clone()).await;
     let mut model_says = if only_deterministic_messages {
         save_url = "only-det-messages".to_string();
         Ok(Value::Object(serde_json::Map::new()))
-    } else if endpoint_style == "hf" {
+    } else if model_rec.endpoint_style == "hf" {
         crate::forward_to_hf_endpoint::forward_to_hf_style_endpoint(
-            &mut save_url,
-            bearer.clone(),
-            &model_name,
-            &prompt,
+            &model_rec,
+            prompt,
             &client,
-            &endpoint_template,
             ¶meters,
             meta
         ).await
     } else {
         crate::forward_to_openai_endpoint::forward_to_openai_style_endpoint(
-            &mut save_url,
-            bearer.clone(),
-            &model_name,
-            &prompt,
+            &model_rec,
+            prompt,
             &client,
-            &endpoint_template,
-            &endpoint_chat_passthrough,
-            ¶meters,  // includes n
-            metadata_supported,
+            ¶meters,
             meta
         ).await
     }.map_err(|e| {
@@ -138,6 +70,8 @@ pub async fn scratchpad_interaction_not_stream_json(
             ));
         ScratchError::new_but_skip_telemetry(StatusCode::INTERNAL_SERVER_ERROR, format!("forward_to_endpoint: {}", e))
     })?;
+    generate_id_and_index_for_tool_calls_if_missing(&mut model_says);
+    
     tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
         save_url.clone(),
         scope.clone(),
@@ -253,7 +187,7 @@ pub async fn scratchpad_interaction_not_stream(
     ccx: Arc>,
     scratchpad: &mut Box,
     scope: String,
-    model_name: String,
+    model_rec: &BaseModelRecord,
     parameters: &mut SamplingParameters,
     only_deterministic_messages: bool,
     meta: Option
@@ -273,7 +207,7 @@ pub async fn scratchpad_interaction_not_stream(
         scratchpad,
         scope,
         prompt.as_str(),
-        model_name,
+        &model_rec,
         parameters,
         only_deterministic_messages,
         meta
@@ -296,34 +230,26 @@ pub async fn scratchpad_interaction_stream(
     ccx: Arc>,
     mut scratchpad: Box,
     scope: String,
-    mut model_name: String,
+    mut model_rec: BaseModelRecord,
     parameters: SamplingParameters,
     only_deterministic_messages: bool,
     meta: Option
 ) -> Result, ScratchError> {
-    let t1 = std::time::SystemTime::now();
+    let t1: std::time::SystemTime = std::time::SystemTime::now();
     let evstream = stream! {
         let my_scratchpad: &mut Box = &mut scratchpad;
         let mut my_parameters = parameters.clone();
         let my_ccx = ccx.clone();
 
         let gcx = ccx.lock().await.global_context.clone();
-        let (client, caps, tele_storage, slowdown_arc) = {
+        let (client, tele_storage, slowdown_arc) = {
             let gcx_locked = gcx.write().await;
-            let caps = gcx_locked.caps.clone().unwrap();
             (
                 gcx_locked.http_client.clone(),
-                caps,
                 gcx_locked.telemetry.clone(),
                 gcx_locked.http_client_slowdown.clone()
             )
         };
-        let (
-            bearer,
-            endpoint_template,
-            endpoint_style,
-            endpoint_chat_passthrough,
-        ) = _get_endpoint_and_stuff_from_model_name(gcx.clone(), caps.clone(), model_name.clone()).await;
 
         let t0 = std::time::Instant::now();
         let mut prompt = String::new();
@@ -376,7 +302,6 @@ pub async fn scratchpad_interaction_stream(
         }
         info!("scratchpad_interaction_stream prompt {:?}", t0.elapsed());
 
-        let mut save_url: String = String::new();
         let _ = slowdown_arc.acquire().await;
         loop {
             let value_maybe = my_scratchpad.response_spontaneous();
@@ -398,29 +323,20 @@ pub async fn scratchpad_interaction_stream(
                 break;
             }
             // info!("prompt: {:?}", prompt);
-            let metadata_supported = crate::global_context::is_metadata_supported(gcx.clone()).await;
-            let event_source_maybe = if endpoint_style == "hf" {
+            let event_source_maybe = if model_rec.endpoint_style == "hf" {
                 crate::forward_to_hf_endpoint::forward_to_hf_style_endpoint_streaming(
-                    &mut save_url,
-                    bearer.clone(),
-                    &model_name,
-                    prompt.as_str(),
+                    &model_rec,
+                    &prompt,
                     &client,
-                    &endpoint_template,
                     &my_parameters,
                     meta
                 ).await
             } else {
                 crate::forward_to_openai_endpoint::forward_to_openai_style_endpoint_streaming(
-                    &mut save_url,
-                    bearer.clone(),
-                    &model_name,
-                    prompt.as_str(),
+                    &model_rec,
+                    &prompt,
                     &client,
-                    &endpoint_template,
-                    &endpoint_chat_passthrough,
                     &my_parameters,
-                    metadata_supported,
                     meta
                 ).await
             };
@@ -429,15 +345,15 @@ pub async fn scratchpad_interaction_stream(
                 Err(e) => {
                     let e_str = format!("forward_to_endpoint: {:?}", e);
                     tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
-                        save_url.clone(),
+                        model_rec.endpoint.clone(),
                         scope.clone(),
                         false,
                         e_str.to_string(),
                     ));
                     tracing::error!(e_str);
-                    let value_str = serde_json::to_string(&json!({"detail": e_str})).unwrap();
+                    let value_str = format!("data: {}\n\n", serde_json::to_string(&json!({"detail": e_str})).unwrap());
                     yield Result::<_, String>::Ok(value_str);
-                    break;
+                    return;
                 }
             };
             let mut was_correct_output_even_if_error = false;
@@ -451,12 +367,13 @@ pub async fn scratchpad_interaction_stream(
                         if message.data.starts_with("[DONE]") {
                             break;
                         }
-                        let json = serde_json::from_str::(&message.data).unwrap();
+                        let mut json = serde_json::from_str::(&message.data).unwrap();
+                        generate_id_and_index_for_tool_calls_if_missing(&mut json);
                         crate::global_context::look_for_piggyback_fields(gcx.clone(), &json).await;
                         match _push_streaming_json_into_scratchpad(
                             my_scratchpad,
                             &json,
-                            &mut model_name,
+                            &mut model_rec.name,
                             &mut was_correct_output_even_if_error,
                         ) {
                             Ok((mut value, finish_reason)) => {
@@ -503,13 +420,13 @@ pub async fn scratchpad_interaction_stream(
                         tracing::error!("restream error: {}\n", problem_str);
                         {
                             tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
-                                save_url.clone(),
+                                model_rec.endpoint.clone(),
                                 scope.clone(),
                                 false,
                                 problem_str.clone(),
                             ));
                         }
-                        yield Result::<_, String>::Ok(serde_json::to_string(&json!({"detail": problem_str})).unwrap());
+                        yield Result::<_, String>::Ok(format!("data: {}\n\n", serde_json::to_string(&json!({"detail": problem_str})).unwrap()));
                         event_source.close();
                         return;
                     },
@@ -518,7 +435,7 @@ pub async fn scratchpad_interaction_stream(
 
             let mut value = my_scratchpad.streaming_finished(last_finish_reason)?;
             value["created"] = json!(t1.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64());
-            value["model"] = json!(model_name.clone());
+            value["model"] = json!(model_rec.name.clone());
             let value_str = format!("data: {}\n\n", serde_json::to_string(&value).unwrap());
             info!("yield final: {:?}", value_str);
             yield Result::<_, String>::Ok(value_str);
@@ -527,7 +444,7 @@ pub async fn scratchpad_interaction_stream(
         info!("yield: [DONE]");
         yield Result::<_, String>::Ok("data: [DONE]\n\n".to_string());
         tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
-            save_url.clone(),
+            model_rec.endpoint.clone(),
             scope.clone(),
             true,
             "".to_string(),
@@ -581,6 +498,44 @@ pub fn try_insert_usage(msg_value: &mut serde_json::Value) -> bool {
     return false;
 }
 
+/// Generates tool call ID and index for tool calls missing them, required by providers like Gemini
+fn generate_id_and_index_for_tool_calls_if_missing(value: &mut serde_json::Value) {
+    fn process_tool_call(tool_call: &mut serde_json::Value, idx: usize) {
+        if let Some(id) = tool_call.get_mut("id") {
+            if id.is_string() && id.as_str().unwrap_or("").is_empty() {
+                let uuid = uuid::Uuid::new_v4().to_string().replace("-", "");
+                *id = json!(format!("call_{uuid}"));
+                tracing::info!("Generated UUID for empty tool call ID: call_{}", uuid);
+            }
+        }
+        if tool_call.get("index").is_none() {
+            tool_call["index"] = json!(idx);
+        }
+    }
+
+    if let Some(tool_calls) = value.get_mut("tool_calls").and_then(|tc| tc.as_array_mut()) {
+        for (i, tool_call) in tool_calls.iter_mut().enumerate() {
+            process_tool_call(tool_call, i);
+        }
+    }
+    
+    if let Some(choices) = value.get_mut("choices").and_then(|c| c.as_array_mut()) {
+        for choice in choices {
+            for field in ["delta", "message"] {
+                if let Some(tool_calls) = choice.get_mut(field)
+                    .and_then(|v| v.get_mut("tool_calls"))
+                    .and_then(|tc| tc.as_array_mut()) 
+                {
+                    for (i, tool_call) in tool_calls.iter_mut().enumerate() {
+                        process_tool_call(tool_call, i);
+                    }
+                }
+            }
+        }
+    }
+}
+
+
 fn _push_streaming_json_into_scratchpad(
     scratch: &mut Box,
     json: &serde_json::Value,
@@ -624,6 +579,8 @@ fn _push_streaming_json_into_scratchpad(
         }
         value["model"] = json!(model_name.clone());
         Ok((value, finish_reason))
+    } else if json.get("type").and_then(|t| t.as_str()) == Some("ping") {
+        Ok((serde_json::value::Value::Null, FinishReason::None))
     } else if let Some(err) = json.get("error") {
         Err(format!("{}", err))
     } else if let Some(msg) = json.get("human_readable_message") {
diff --git a/refact-agent/engine/src/scratchpad_abstract.rs b/refact-agent/engine/src/scratchpad_abstract.rs
index 0ae9ef1fc..cfa463d3c 100644
--- a/refact-agent/engine/src/scratchpad_abstract.rs
+++ b/refact-agent/engine/src/scratchpad_abstract.rs
@@ -1,6 +1,5 @@
 use serde_json;
 use std::sync::Arc;
-use std::sync::RwLock;
 use tokio::sync::Mutex as AMutex;
 use tokenizers::Tokenizer;
 use async_trait::async_trait;
@@ -8,6 +7,7 @@ use serde_json::Value;
 
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::SamplingParameters;
+use crate::tokens::count_text_tokens;
 
 use tracing::warn;
 
@@ -120,7 +120,7 @@ pub trait ScratchpadAbstract: Send {
 // aggregate this struct to make scratchpad implementation easier
 #[derive(Debug, Clone)]
 pub struct HasTokenizerAndEot {
-    pub tokenizer: Arc>,
+    pub tokenizer: Option>,
     pub eot: String,
     pub eos: String,
     pub context_format: String,
@@ -128,7 +128,7 @@ pub struct HasTokenizerAndEot {
 }
 
 impl HasTokenizerAndEot {
-    pub fn new(tokenizer: Arc>) -> Self {
+    pub fn new(tokenizer: Option>) -> Self {
         HasTokenizerAndEot { tokenizer, eot: String::new(), eos: String::new(), context_format: String::new(), rag_ratio: 0.5}
     }
 
@@ -136,24 +136,23 @@ impl HasTokenizerAndEot {
         &self,
         text: &str,
     ) -> Result {
-        let tokenizer = self.tokenizer.write().unwrap();
-        let tokens = tokenizer.encode(text, false).map_err(|err| {
-            return format!("Encoding error: {}", err);
-        })?;
-        Ok(tokens.len() as i32)
+        count_text_tokens(self.tokenizer.clone(), text).map(|t| t as i32)
     }
 
     pub fn assert_one_token(
         &self,
         text: &str
     ) -> Result<(), String> {
-        let tokenizer = self.tokenizer.write().unwrap();
-        let tokens = tokenizer.encode(text, false).map_err(|err| {
-            format!("assert_one_token: {}", err)
-        })?;
-        if tokens.len() != 1 {
-            return Err(format!("assert_one_token: expected 1 token for \"{}\", got {}", text, tokens.len()));
+        if self.tokenizer.is_none() {
+            return Err("assert_one_token: no tokenizer".to_string());
+        }
+
+        let token_count = count_text_tokens(self.tokenizer.clone(), text)?;
+
+        if token_count != 1 {
+            Err(format!("assert_one_token: expected 1 token for \"{text}\", got {token_count}"))
+        } else {
+            Ok(())
         }
-        Ok(())
     }
 }
diff --git a/refact-agent/engine/src/scratchpads/chat_generic.rs b/refact-agent/engine/src/scratchpads/chat_generic.rs
index c0e24056a..40c941471 100644
--- a/refact-agent/engine/src/scratchpads/chat_generic.rs
+++ b/refact-agent/engine/src/scratchpads/chat_generic.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock;
 
 use async_trait::async_trait;
 use serde_json::Value;
@@ -40,7 +39,7 @@ pub struct GenericChatScratchpad {
 
 impl GenericChatScratchpad {
     pub fn new(
-        tokenizer: Arc>,
+        tokenizer: Option>,
         post: &ChatPost,
         messages: &Vec,
         prepend_system_prompt: bool,
diff --git a/refact-agent/engine/src/scratchpads/chat_passthrough.rs b/refact-agent/engine/src/scratchpads/chat_passthrough.rs
index 0af619f3d..8fbe8ccde 100644
--- a/refact-agent/engine/src/scratchpads/chat_passthrough.rs
+++ b/refact-agent/engine/src/scratchpads/chat_passthrough.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 use indexmap::IndexMap;
 use serde_json::{json, Value};
 use tokenizers::Tokenizer;
@@ -10,6 +9,7 @@ use tracing::info;
 use crate::at_commands::execute_at::{run_at_commands_locally, run_at_commands_remotely};
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{ChatMessage, ChatPost, ReasoningEffort, SamplingParameters};
+use crate::caps::resolve_chat_model;
 use crate::http::http_get_json;
 use crate::integrations::docker::docker_container_manager::docker_container_get_host_lsp_port_to_connect;
 use crate::scratchpad_abstract::{FinishReason, HasTokenizerAndEot, ScratchpadAbstract};
@@ -68,7 +68,7 @@ pub struct ChatPassthrough {
 
 impl ChatPassthrough {
     pub fn new(
-        tokenizer: Arc>,
+        tokenizer: Option>,
         post: &ChatPost,
         messages: &Vec,
         prepend_system_prompt: bool,
@@ -215,23 +215,17 @@ impl ScratchpadAbstract for ChatPassthrough {
             let gcx_locked = gcx.write().await;
             gcx_locked.caps.clone().unwrap()
         };
-        let model_record_mb = {
-            let caps_locked = caps.read().unwrap();
-            caps_locked.code_chat_models.get(&self.post.model).cloned()
-        };
+        let model_record_mb = resolve_chat_model(caps, &self.post.model).ok();
 
-        let supports_reasoning = if let Some(model_record) = model_record_mb.clone() {
-            !model_record.supports_reasoning.is_none()
-        } else {
-            false
-        };
+        let supports_reasoning = model_record_mb.as_ref()
+            .map_or(false, |m| m.supports_reasoning.is_some());
 
         let limited_adapted_msgs = if supports_reasoning {
-            let model_record = model_record_mb.unwrap();
+            let model_record = model_record_mb.clone().unwrap();
             _adapt_for_reasoning_models(
-                &limited_msgs,
+                limited_msgs,
                 sampling_parameters_to_patch,
-                model_record.supports_reasoning.unwrap(),
+                model_record.supports_reasoning.as_ref().unwrap().clone(),
                 model_record.default_temperature.clone(),
                 model_record.supports_boost_reasoning.clone(),
             )
@@ -239,7 +233,8 @@ impl ScratchpadAbstract for ChatPassthrough {
             limited_msgs
         };
 
-        let converted_messages = convert_messages_to_openai_format(limited_adapted_msgs, &style);
+        let model_id = model_record_mb.map(|m| m.base.id.clone()).unwrap_or_default();
+        let converted_messages = convert_messages_to_openai_format(limited_adapted_msgs, &style, &model_id);
         big_json["messages"] = json!(converted_messages);
         big_json["compression_strength"] = json!(compression_strength);
 
@@ -285,7 +280,7 @@ impl ScratchpadAbstract for ChatPassthrough {
 }
 
 fn _adapt_for_reasoning_models(
-    messages: &Vec,
+    messages: Vec,
     sampling_parameters: &mut SamplingParameters,
     supports_reasoning: String,
     default_temperature: Option,
@@ -299,8 +294,7 @@ fn _adapt_for_reasoning_models(
             sampling_parameters.temperature = default_temperature;
 
             // NOTE: OpenAI prefer user message over system
-            messages.iter().map(|msg| {
-                let mut msg = msg.clone();
+            messages.into_iter().map(|mut msg| {
                 if msg.role == "system" {
                     msg.role = "user".to_string();
                 }
@@ -319,11 +313,11 @@ fn _adapt_for_reasoning_models(
                     "budget_tokens": budget_tokens,
                 }));
             }
-            messages.clone()
+            messages
         },
         _ => {
             sampling_parameters.temperature = default_temperature.clone();
-            messages.clone()
+            messages
         }
     }
 }
diff --git a/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs b/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs
index a03b2d268..aef5f3044 100644
--- a/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs
+++ b/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs
@@ -40,15 +40,15 @@ pub enum CompressionStrength {
 /// 
 /// # Arguments
 /// 
-/// * `model_name` - The name of the model (e.g., "claude-3-7-sonnet")
+/// * `model_id` - Provider / Model name (e.g., "Refact/claude-3-7-sonnet")
 /// 
 /// # Returns
 /// 
 /// A tuple containing (EXTRA_TOKENS_PER_MESSAGE, EXTRA_BUDGET_OFFSET_PERC)
-pub fn get_model_token_params(model_name: &str) -> (i32, f32) {
-    match model_name {
+pub fn get_model_token_params(model_id: &str) -> (i32, f32) {
+    match model_id {
         // Claude 3 Sonnet models need higher token overhead
-        "claude-3-7-sonnet" | "claude-3-5-sonnet" => (150, 0.2),
+        m if m.contains("claude-3-7-sonnet") | m.contains("claude-3-5-sonnet") => (150, 0.2),
         
         // Default values for all other models
         _ => (3, 0.0),
@@ -60,11 +60,11 @@ fn recalculate_token_limits(
     tools_description_tokens: i32,
     n_ctx: usize,
     max_new_tokens: usize,
-    model_name: &str,
+    model_id: &str,
 ) -> (i32, i32) {
     let occupied_tokens = token_counts.iter().sum::() + tools_description_tokens;
     
-    let (_, extra_budget_offset_perc) = get_model_token_params(model_name);
+    let (_, extra_budget_offset_perc) = get_model_token_params(model_id);
     
     let extra_budget = (n_ctx as f32 * extra_budget_offset_perc) as usize;
     let tokens_limit = n_ctx.saturating_sub(max_new_tokens).saturating_sub(extra_budget) as i32;
@@ -77,7 +77,7 @@ fn compress_message_at_index(
     token_counts: &mut Vec,
     token_cache: &mut TokenCountCache,
     index: usize,
-    model_name: &str,
+    model_id: &str,
 ) -> Result {
     let role = &mutable_messages[index].role;
     let new_summary = if role == "context_file" {
@@ -115,7 +115,7 @@ fn compress_message_at_index(
     
     mutable_messages[index].content = ChatContent::SimpleText(new_summary);
     token_cache.invalidate(&mutable_messages[index]);
-    let (extra_tokens_per_message, _) = get_model_token_params(model_name);
+    let (extra_tokens_per_message, _) = get_model_token_params(model_id);
     // Recalculate token usage after compression using the cache
     token_counts[index] = token_cache.get_token_count(&mutable_messages[index], t.tokenizer.clone(), extra_tokens_per_message)?;
     Ok(token_counts[index])
@@ -132,14 +132,14 @@ fn process_compression_stage(
     start_idx: usize,
     end_idx: usize,
     stage_name: &str,
-    model_name: &str,
+    model_id: &str,
     message_filter: impl Fn(usize, &ChatMessage, i32) -> bool,
     sort_by_size: bool,
 ) -> Result<(i32, i32, bool), String> {
     tracing::info!("n_ctx={n_ctx}, max_new_tokens={max_new_tokens}");
     tracing::info!("STAGE: {}", stage_name);
     let (mut occupied_tokens, tokens_limit) = 
-        recalculate_token_limits(token_counts, tools_description_tokens, n_ctx, max_new_tokens, model_name);
+        recalculate_token_limits(token_counts, tools_description_tokens, n_ctx, max_new_tokens, model_id);
     let mut budget_reached = false;
     let messages_len = mutable_messages.len();
     let end = std::cmp::min(end_idx, messages_len);
@@ -164,7 +164,7 @@ fn process_compression_stage(
     }
     
     for (i, original_tokens) in indices_to_process {
-        compress_message_at_index(t, mutable_messages, token_counts, token_cache, i, model_name)?;
+        compress_message_at_index(t, mutable_messages, token_counts, token_cache, i, model_id)?;
         let token_delta = token_counts[i] - original_tokens;
         occupied_tokens += token_delta;
         tracing::info!("Compressed message at index {}: token count {} -> {} (saved {})", 
@@ -488,7 +488,7 @@ pub fn fix_and_limit_messages_history(
     sampling_parameters_to_patch: &mut SamplingParameters,
     n_ctx: usize,
     tools_description: Option,
-    model_name: &str,
+    model_id: &str,
 ) -> Result<(Vec, CompressionStrength), String> {
     let start_time = Instant::now();
     
@@ -516,7 +516,7 @@ pub fn fix_and_limit_messages_history(
         16000
     );
 
-    let (extra_tokens_per_message, _) = get_model_token_params(model_name);
+    let (extra_tokens_per_message, _) = get_model_token_params(model_id);
     let mut token_cache = TokenCountCache::new();
     let mut token_counts: Vec = Vec::with_capacity(mutable_messages.len());
     for msg in &mutable_messages {
@@ -532,7 +532,7 @@ pub fn fix_and_limit_messages_history(
     tracing::info!("Calculated undroppable_msg_n = {} (last user message)", undroppable_msg_n);
     let outlier_threshold = 1000;
     let (mut occupied_tokens, mut tokens_limit) = 
-        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_name);
+        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_id);
     tracing::info!("Before compression: occupied_tokens={} vs tokens_limit={}", occupied_tokens, tokens_limit);
     
     // STAGE 1: Compress ContextFile messages before the last user message
@@ -550,7 +550,7 @@ pub fn fix_and_limit_messages_history(
             1, // Start from index 1 to preserve the initial message
             stage1_end,
             "Stage 1: Compressing ContextFile messages before the last user message",
-            model_name,
+            model_id,
             |i, msg, _| i != 0 && msg.role == "context_file" && !preserve_in_later_stages[i],
             true
         )?;
@@ -579,7 +579,7 @@ pub fn fix_and_limit_messages_history(
             1, // Start from index 1 to preserve the initial message
             stage2_end,
             "Stage 2: Compressing Tool Result messages before the last user message",
-            model_name,
+            model_id,
             |i, msg, _| i != 0 && msg.role == "tool",
             true
         )?;
@@ -608,7 +608,7 @@ pub fn fix_and_limit_messages_history(
             1, // Start from index 1 to preserve the initial message
             stage3_end,
             "Stage 3: Compressing outlier messages before the last user message",
-            model_name,
+            model_id,
             |i, msg, token_count| {
                 i != 0 && 
                 token_count > outlier_threshold && 
@@ -717,7 +717,7 @@ pub fn fix_and_limit_messages_history(
             undroppable_msg_n,
             msg_len,
             "Stage 5: Compressing ContextFile messages after the last user message (last resort)",
-            model_name,
+            model_id,
             |_, msg, _| msg.role == "context_file",
             true
         )?;
@@ -744,7 +744,7 @@ pub fn fix_and_limit_messages_history(
             undroppable_msg_n,
             msg_len,
             "Stage 6: Compressing Tool Result messages after the last user message (last resort)",
-            model_name,
+            model_id,
             |_, msg, _| msg.role == "tool",
             true
         )?;
@@ -772,7 +772,7 @@ pub fn fix_and_limit_messages_history(
             undroppable_msg_n,
             msg_len,
             "Stage 7: Compressing outlier messages in the last conversation block (last resort)",
-            model_name,
+            model_id,
             |i, msg, token_count| {
                 i >= undroppable_msg_n &&
                 token_count > outlier_threshold && 
@@ -791,7 +791,7 @@ pub fn fix_and_limit_messages_history(
 
     remove_invalid_tool_calls_and_tool_calls_results(&mut mutable_messages);
     let (occupied_tokens, tokens_limit) =
-        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_name);
+        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_id);
     tracing::info!("Final occupied_tokens={} <= tokens_limit={}", occupied_tokens, tokens_limit);
 
     // If we're still over the limit after all compression stages, return an error
@@ -1248,14 +1248,22 @@ mod tests {
 
     impl HasTokenizerAndEot {
         fn mock() -> Arc {
-            use std::sync::RwLock;
             use tokenizers::Tokenizer;
             use tokenizers::models::wordpiece::WordPiece;
-            let wordpiece = WordPiece::default();
+            use std::collections::HashMap;
+
+            let mut vocab = HashMap::new();
+            vocab.insert("[UNK]".to_string(), 0);
+
+            let wordpiece = WordPiece::builder()
+                .vocab(vocab)
+                .unk_token("[UNK]".to_string())
+                .build()
+                .unwrap();
             let mock_tokenizer = Tokenizer::new(wordpiece);
 
             Arc::new(Self {
-                tokenizer: Arc::new(RwLock::new(mock_tokenizer)),
+                tokenizer: Some(Arc::new(mock_tokenizer)),
                 eot: "".to_string(),
                 eos: "".to_string(),
                 context_format: "".to_string(),
diff --git a/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs b/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs
index 2a310c656..069b7bca1 100644
--- a/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs
+++ b/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs
@@ -19,12 +19,7 @@ pub async fn get_default_system_prompt(
     let mut error_log = Vec::new();
     let tconfig = crate::yaml_configs::customization_loader::load_customization(gcx.clone(), true, &mut error_log).await;
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
     let prompt_key = match chat_mode {
         ChatMode::NO_TOOLS => "default",
diff --git a/refact-agent/engine/src/scratchpads/code_completion_fim.rs b/refact-agent/engine/src/scratchpads/code_completion_fim.rs
index 2ef1f5a26..90f46367f 100644
--- a/refact-agent/engine/src/scratchpads/code_completion_fim.rs
+++ b/refact-agent/engine/src/scratchpads/code_completion_fim.rs
@@ -39,7 +39,7 @@ pub struct FillInTheMiddleScratchpad {
 
 impl FillInTheMiddleScratchpad {
     pub fn new(
-        tokenizer: Arc>,
+        tokenizer: Option>,
         post: &CodeCompletionPost,
         order: String,
         cache_arc: Arc>,
@@ -91,12 +91,14 @@ impl ScratchpadAbstract for FillInTheMiddleScratchpad {
         self.t.eos = patch.get("eos").and_then(|x| x.as_str()).unwrap_or("").to_string();
         self.t.context_format = patch.get("context_format").and_then(|x| x.as_str()).unwrap_or_default().to_string();
         self.t.rag_ratio = patch.get("rag_ratio").and_then(|x| x.as_f64()).unwrap_or(0.5);
-        self.t.assert_one_token(&self.fim_prefix.as_str())?;
-        self.t.assert_one_token(&self.fim_suffix.as_str())?;
-        self.t.assert_one_token(&self.fim_middle.as_str())?;
-        self.t.assert_one_token(&self.t.eot.as_str())?;
-        if !self.t.eos.is_empty() {
-            self.t.assert_one_token(&self.t.eos.as_str())?;
+        if self.t.tokenizer.is_some() {
+            self.t.assert_one_token(&self.fim_prefix.as_str())?;
+            self.t.assert_one_token(&self.fim_suffix.as_str())?;
+            self.t.assert_one_token(&self.fim_middle.as_str())?;
+            self.t.assert_one_token(&self.t.eot.as_str())?;
+            if !self.t.eos.is_empty() {
+                self.t.assert_one_token(&self.t.eos.as_str())?;
+            }
         }
         Ok(())
     }
diff --git a/refact-agent/engine/src/scratchpads/code_completion_replace.rs b/refact-agent/engine/src/scratchpads/code_completion_replace.rs
index 2e232c85c..7acf0ab2f 100644
--- a/refact-agent/engine/src/scratchpads/code_completion_replace.rs
+++ b/refact-agent/engine/src/scratchpads/code_completion_replace.rs
@@ -3,6 +3,7 @@ use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{
     ChatContent, ChatMessage, CodeCompletionPost, CursorPosition, SamplingParameters,
 };
+use crate::caps::resolve_completion_model;
 use crate::completion_cache;
 use crate::global_context::GlobalContext;
 use crate::scratchpad_abstract::{FinishReason, HasTokenizerAndEot, ScratchpadAbstract};
@@ -201,14 +202,16 @@ async fn prepare_subblock(
     if let Some(symbol) = get_cursor_symbol_from_doc(ast_service.clone(), cpath, cursor_pos).await {
         let min_rows_to_include = 2;
         for idx in symbol.full_line1().saturating_sub(1)..symbol.full_line2() + 1 {
-            let line = file_text.line(idx).to_string();
-            tokens_used += tokenizer.count_tokens(&line).unwrap_or(0) as usize;
-            if idx < cursor_pos.line as usize {
-                subblock.before_lines.push(line);
-            } else if idx > cursor_pos.line as usize {
-                subblock.after_lines_extra.push(line.clone());
-                if tokens_used <= max_tokens || subblock.after_lines.len() < min_rows_to_include {
-                    subblock.after_lines.push(line);
+            if idx < file_text.len_lines() {
+                let line = file_text.line(idx).to_string();
+                tokens_used += tokenizer.count_tokens(&line).unwrap_or(0) as usize;
+                if idx < cursor_pos.line as usize {
+                    subblock.before_lines.push(line);
+                } else if idx > cursor_pos.line as usize {
+                    subblock.after_lines_extra.push(line.clone());
+                    if tokens_used <= max_tokens || subblock.after_lines.len() < min_rows_to_include {
+                        subblock.after_lines.push(line);
+                    }
                 }
             }
         }
@@ -556,7 +559,7 @@ pub struct CodeCompletionReplaceScratchpad {
 
 impl CodeCompletionReplaceScratchpad {
     pub fn new(
-        tokenizer: Arc>,
+        tokenizer: Option>,
         post: &CodeCompletionPost,
         cache_arc: Arc>,
         tele_storage: Arc>,
@@ -646,17 +649,19 @@ impl ScratchpadAbstract for CodeCompletionReplaceScratchpad {
             .get("rag_ratio")
             .and_then(|x| x.as_f64())
             .unwrap_or(0.5);
-        if !self.token_bos.is_empty() {
-            self.t.assert_one_token(&self.token_bos.as_str())?;
-        }
-        if !self.token_esc.is_empty() {
-            self.t.assert_one_token(&self.token_esc.as_str())?;
-        }
-        if !self.t.eot.is_empty() {
-            self.t.assert_one_token(&self.t.eot.as_str())?;
-        }
-        if !self.t.eos.is_empty() {
-            self.t.assert_one_token(&self.t.eos.as_str())?;
+        if self.t.tokenizer.is_some() {
+            if !self.token_bos.is_empty() {
+                self.t.assert_one_token(&self.token_bos.as_str())?;
+            }
+            if !self.token_esc.is_empty() {
+                self.t.assert_one_token(&self.token_esc.as_str())?;
+            }
+            if !self.t.eot.is_empty() {
+                self.t.assert_one_token(&self.t.eot.as_str())?;
+            }
+            if !self.t.eos.is_empty() {
+                self.t.assert_one_token(&self.t.eos.as_str())?;
+            }
         }
         Ok(())
     }
@@ -843,7 +848,7 @@ pub struct CodeCompletionReplacePassthroughScratchpad {
 
 impl CodeCompletionReplacePassthroughScratchpad {
     pub fn new(
-        tokenizer: Arc>,
+        tokenizer: Option>,
         post: &CodeCompletionPost,
         cache_arc: Arc>,
         tele_storage: Arc>,
@@ -891,10 +896,11 @@ impl ScratchpadAbstract for CodeCompletionReplacePassthroughScratchpad {
         ccx: Arc>,
         sampling_parameters_to_patch: &mut SamplingParameters,
     ) -> Result {
-        let (n_ctx, _gcx) = {
+        let (n_ctx, gcx) = {
             let ccx_locked = ccx.lock().await;
             (ccx_locked.n_ctx, ccx_locked.global_context.clone())
         };
+        let caps = gcx.read().await.caps.clone().ok_or_else(|| "No caps".to_string())?;
         let completion_t0 = Instant::now();
         let use_rag = self.t.rag_ratio > 0.0 && self.post.use_ast && self.ast_service.is_some();
         sampling_parameters_to_patch.max_new_tokens = MAX_NEW_TOKENS;
@@ -1005,8 +1011,9 @@ impl ScratchpadAbstract for CodeCompletionReplacePassthroughScratchpad {
             ..Default::default()
         });
 
+        let model = resolve_completion_model(caps.clone(), &self.post.model, true)?;
         let json_messages = &serde_json::to_string(&json!({
-            "messages":  messages.iter().map(|x| { x.into_value(&None) }).collect::>(),
+            "messages":  messages.iter().map(|x| { x.into_value(&None, &model.base.id) }).collect::>(),
         }))
         .unwrap();
         let prompt = format!("PASSTHROUGH {json_messages}").to_string();
diff --git a/refact-agent/engine/src/scratchpads/mod.rs b/refact-agent/engine/src/scratchpads/mod.rs
index 0b41eb8e3..7ac1b0831 100644
--- a/refact-agent/engine/src/scratchpads/mod.rs
+++ b/refact-agent/engine/src/scratchpads/mod.rs
@@ -1,7 +1,6 @@
 use std::sync::Arc;
 use std::sync::RwLock as StdRwLock;
 use tokio::sync::{Mutex as AMutex, RwLock as ARwLock};
-use tokenizers::Tokenizer;
 
 pub mod code_completion_fim;
 pub mod chat_generic;
@@ -20,12 +19,13 @@ mod completon_rag;
 use crate::ast::ast_indexer_thread::AstIndexService;
 use crate::call_validation::{ChatMessage, CodeCompletionPost};
 use crate::call_validation::ChatPost;
+use crate::caps::ChatModelRecord;
+use crate::caps::CompletionModelRecord;
 use crate::global_context::GlobalContext;
-use crate::caps::CodeAssistantCaps;
 use crate::scratchpad_abstract::ScratchpadAbstract;
 use crate::completion_cache;
 use crate::telemetry::telemetry_structs;
-use crate::cached_tokenizers;
+use crate::tokens;
 
 
 fn verify_has_send(_x: &T) {}
@@ -33,66 +33,58 @@ fn verify_has_send(_x: &T) {}
 
 pub async fn create_code_completion_scratchpad(
     global_context: Arc>,
-    caps: Arc>,
-    model_name_for_tokenizer: String,
+    model_rec: &CompletionModelRecord,
     post: &CodeCompletionPost,
-    scratchpad_name: &str,
-    scratchpad_patch: &serde_json::Value,
     cache_arc: Arc>,
     tele_storage: Arc>,
     ast_module: Option>>,
 ) -> Result, String> {
     let mut result: Box;
-    let tokenizer_arc: Arc> = cached_tokenizers::cached_tokenizer(caps, global_context.clone(), model_name_for_tokenizer).await?;
-    if scratchpad_name == "FIM-PSM" {
+    let tokenizer_arc = crate::tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await?;
+    if model_rec.scratchpad == "FIM-PSM" {
         result = Box::new(code_completion_fim::FillInTheMiddleScratchpad::new(
             tokenizer_arc, &post, "PSM".to_string(), cache_arc, tele_storage, ast_module, global_context.clone()
         ))
-    } else if scratchpad_name == "FIM-SPM" {
+    } else if model_rec.scratchpad == "FIM-SPM" {
         result = Box::new(code_completion_fim::FillInTheMiddleScratchpad::new(
             tokenizer_arc, &post, "SPM".to_string(), cache_arc, tele_storage, ast_module, global_context.clone()
         ))
-    } else if scratchpad_name == "REPLACE" {
+    } else if model_rec.scratchpad == "REPLACE" {
         result = Box::new(code_completion_replace::CodeCompletionReplaceScratchpad::new(
             tokenizer_arc, &post, cache_arc, tele_storage, ast_module, global_context.clone()
         ))
-    } else if scratchpad_name == "REPLACE_PASSTHROUGH" {
+    } else if model_rec.scratchpad == "REPLACE_PASSTHROUGH" {
         result = Box::new(code_completion_replace::CodeCompletionReplacePassthroughScratchpad::new(
             tokenizer_arc, &post, cache_arc, tele_storage, ast_module, global_context.clone()
         ))
     } else {
-        return Err(format!("This rust binary doesn't have code completion scratchpad \"{}\" compiled in", scratchpad_name));
+        return Err(format!("This rust binary doesn't have code completion scratchpad \"{}\" compiled in", model_rec.scratchpad));
     }
-    result.apply_model_adaptation_patch(scratchpad_patch, false, false).await?;
+    result.apply_model_adaptation_patch(&model_rec.scratchpad_patch, false, false).await?;
     verify_has_send(&result);
     Ok(result)
 }
 
 pub async fn create_chat_scratchpad(
     global_context: Arc>,
-    caps: Arc>,
-    model_name_for_tokenizer: String,
     post: &mut ChatPost,
     messages: &Vec,
     prepend_system_prompt: bool,
-    scratchpad_name: &str,
-    scratchpad_patch: &serde_json::Value,
+    model_rec: &ChatModelRecord,
     allow_at: bool,
-    supports_tools: bool,
-    supports_clicks: bool,
 ) -> Result, String> {
     let mut result: Box;
-    let tokenizer_arc = cached_tokenizers::cached_tokenizer(caps, global_context.clone(), model_name_for_tokenizer).await?;
-    if scratchpad_name == "CHAT-GENERIC" {
+    let tokenizer_arc = tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await?;
+    if model_rec.scratchpad == "CHAT-GENERIC" {
         result = Box::new(chat_generic::GenericChatScratchpad::new(
             tokenizer_arc.clone(), post, messages, prepend_system_prompt, allow_at
         ));
-    } else if scratchpad_name == "PASSTHROUGH" {
+    } else if model_rec.scratchpad == "PASSTHROUGH" {
         result = Box::new(chat_passthrough::ChatPassthrough::new(
-            tokenizer_arc.clone(), post, messages, prepend_system_prompt, allow_at, supports_tools, supports_clicks
+            tokenizer_arc.clone(), post, messages, prepend_system_prompt, allow_at, model_rec.supports_tools, model_rec.supports_clicks
         ));
     } else {
-        return Err(format!("This rust binary doesn't have chat scratchpad \"{}\" compiled in", scratchpad_name));
+        return Err(format!("This rust binary doesn't have chat scratchpad \"{}\" compiled in", model_rec.scratchpad));
     }
     let mut exploration_tools: bool = false;
     let mut agentic_tools: bool = false;
@@ -111,7 +103,7 @@ pub async fn create_chat_scratchpad(
             }
         }
     }
-    result.apply_model_adaptation_patch(scratchpad_patch, exploration_tools, agentic_tools).await?;
+    result.apply_model_adaptation_patch(&model_rec.scratchpad_patch, exploration_tools, agentic_tools).await?;
     verify_has_send(&result);
     Ok(result)
 }
diff --git a/refact-agent/engine/src/scratchpads/multimodality.rs b/refact-agent/engine/src/scratchpads/multimodality.rs
index 5365c90e0..d03d413e9 100644
--- a/refact-agent/engine/src/scratchpads/multimodality.rs
+++ b/refact-agent/engine/src/scratchpads/multimodality.rs
@@ -1,9 +1,10 @@
 use serde::{Deserialize, Deserializer, Serialize};
-use std::sync::{Arc, RwLock, RwLockReadGuard};
+use std::sync::Arc;
 use serde_json::{json, Value};
 use tokenizers::Tokenizer;
 use crate::call_validation::{ChatContent, ChatMessage, ChatToolCall};
-use crate::scratchpads::scratchpad_utils::{calculate_image_tokens_openai, count_tokens as count_tokens_simple_text, image_reader_from_b64string, parse_image_b64_from_image_url_openai};
+use crate::scratchpads::scratchpad_utils::{calculate_image_tokens_openai, image_reader_from_b64string, parse_image_b64_from_image_url_openai};
+use crate::tokens::count_text_tokens;
 
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
@@ -76,13 +77,9 @@ impl MultimodalElement {
         })
     }
 
-    pub fn count_tokens(&self, tokenizer: Option<&RwLockReadGuard>, style: &Option) -> Result {
+    pub fn count_tokens(&self, tokenizer: Option>, style: &Option) -> Result {
         if self.is_text() {
-            if let Some(tokenizer) = tokenizer {
-                Ok(count_tokens_simple_text(&tokenizer, &self.m_content) as i32)
-            } else {
-                return Err("count_tokens() received no tokenizer".to_string());
-            }
+            Ok(count_text_tokens(tokenizer, &self.m_content)? as i32)
         } else if self.is_image() {
             let style = style.clone().unwrap_or("openai".to_string());
             match style.as_str() {
@@ -157,6 +154,13 @@ impl ChatContentRaw {
             }
         }
     }
+
+    pub fn is_empty(&self) -> bool {
+        match self {
+            ChatContentRaw::SimpleText(text) => text.is_empty(),
+            ChatContentRaw::Multimodal(elements) => elements.is_empty(),
+        }
+    }
 }
 
 impl ChatContent {
@@ -171,7 +175,7 @@ impl ChatContent {
         }
     }
 
-    pub fn size_estimate(&self, tokenizer: Arc>, style: &Option) -> usize {
+    pub fn size_estimate(&self, tokenizer: Option>, style: &Option) -> usize {
         match self {
             ChatContent::SimpleText(text) => text.len(),
             ChatContent::Multimodal(_elements) => {
@@ -181,12 +185,11 @@ impl ChatContent {
         }
     }
 
-    pub fn count_tokens(&self, tokenizer: Arc>, style: &Option) -> Result {
-        let tokenizer_lock = tokenizer.read().unwrap();
+    pub fn count_tokens(&self, tokenizer: Option>, style: &Option) -> Result {
         match self {
-            ChatContent::SimpleText(text) => Ok(count_tokens_simple_text(&tokenizer_lock, text) as i32),
+            ChatContent::SimpleText(text) => Ok(count_text_tokens(tokenizer, text)? as i32),
             ChatContent::Multimodal(elements) => elements.iter()
-                .map(|e|e.count_tokens(Some(&tokenizer_lock), style))
+                .map(|e|e.count_tokens(tokenizer.clone(), style))
                 .collect::, _>>()
                 .map(|counts| counts.iter().sum()),
         }
@@ -254,14 +257,19 @@ impl ChatMessage {
         }
     }
 
-    pub fn into_value(&self, style: &Option) -> Value {
+    pub fn into_value(&self, style: &Option, model_id: &str) -> Value {
         let mut dict = serde_json::Map::new();
         let chat_content_raw = self.content.into_raw(style);
-
         dict.insert("role".to_string(), Value::String(self.role.clone()));
-        dict.insert("content".to_string(), json!(chat_content_raw));
-        dict.insert("tool_calls".to_string(), json!(self.tool_calls.clone()));
-        dict.insert("tool_call_id".to_string(), Value::String(self.tool_call_id.clone()));
+        if model_supports_empty_strings(model_id) || !chat_content_raw.is_empty() {
+            dict.insert("content".to_string(), json!(chat_content_raw));
+        }
+        if let Some(tool_calls) = self.tool_calls.clone() {
+            dict.insert("tool_calls".to_string(), json!(tool_calls));
+        }
+        if !self.tool_call_id.is_empty() {
+            dict.insert("tool_call_id".to_string(), Value::String(self.tool_call_id.clone()));
+        }
         if let Some(thinking_blocks) = self.thinking_blocks.clone() {
             dict.insert("thinking_blocks".to_string(), json!(thinking_blocks));
         }
@@ -312,3 +320,8 @@ impl<'de> Deserialize<'de> for ChatMessage {
         })
     }
 }
+
+/// If API supports sending fields with empty strings
+fn model_supports_empty_strings(model_id: &str) -> bool {
+    !model_id.starts_with("google_gemini/")
+}
\ No newline at end of file
diff --git a/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs b/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs
index eae6727c9..c45dbdfad 100644
--- a/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs
+++ b/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs
@@ -4,7 +4,7 @@ use tracing::{error, warn};
 use crate::call_validation::{ChatContent, ChatMessage, ContextFile, DiffChunk};
 
 
-pub fn convert_messages_to_openai_format(messages: Vec, style: &Option) -> Vec {
+pub fn convert_messages_to_openai_format(messages: Vec, style: &Option, model_id: &str) -> Vec {
     let mut results = vec![];
     let mut delay_images = vec![];
 
@@ -26,28 +26,28 @@ pub fn convert_messages_to_openai_format(messages: Vec, style: &Opt
                     };
                     let mut msg_cloned = msg.clone();
                     msg_cloned.content = ChatContent::SimpleText(text);
-                    results.push(msg_cloned.into_value(&style));
+                    results.push(msg_cloned.into_value(&style, model_id));
                     if !images.is_empty() {
                         let msg_img = ChatMessage {
                             role: "user".to_string(),
                             content: ChatContent::Multimodal(images.into_iter().cloned().collect()),
                             ..Default::default()
                         };
-                        delay_images.push(msg_img.into_value(&style));
+                        delay_images.push(msg_img.into_value(&style, model_id));
                     }
                 },
                 ChatContent::SimpleText(_) => {
-                    results.push(msg.into_value(&style));
+                    results.push(msg.into_value(&style, model_id));
                 }
             }
 
         } else if msg.role == "assistant" || msg.role == "system" {
             flush_delayed_images(&mut results, &mut delay_images);
-            results.push(msg.into_value(&style));
+            results.push(msg.into_value(&style, model_id));
 
         } else if msg.role == "user" {
             flush_delayed_images(&mut results, &mut delay_images);
-            results.push(msg.into_value(&style));
+            results.push(msg.into_value(&style, model_id));
 
         } else if msg.role == "diff" {
             let extra_message = match serde_json::from_str::>(&msg.content.content_text_only()) {
@@ -66,14 +66,14 @@ pub fn convert_messages_to_openai_format(messages: Vec, style: &Opt
                 tool_call_id: msg.tool_call_id.clone(),
                 ..Default::default()
             };
-            results.push(tool_msg.into_value(&style));
+            results.push(tool_msg.into_value(&style, model_id));
 
         } else if msg.role == "plain_text" || msg.role == "cd_instruction" {
             flush_delayed_images(&mut results, &mut delay_images);
             results.push(ChatMessage::new(
                 "user".to_string(),
                 msg.content.content_text_only(),
-            ).into_value(&style));
+            ).into_value(&style, model_id));
 
         } else if msg.role == "context_file" {
             flush_delayed_images(&mut results, &mut delay_images);
@@ -87,7 +87,7 @@ pub fn convert_messages_to_openai_format(messages: Vec, style: &Opt
                                     context_file.line1,
                                     context_file.line2,
                                     context_file.file_content),
-                        ).into_value(&style));
+                        ).into_value(&style, model_id));
                     }
                 },
                 Err(e) => { error!("error parsing context file: {}", e); }
@@ -190,7 +190,7 @@ mod tests {
         let roles_out_expected = expected_output.iter().map(|x| x.get("role").unwrap().as_str().unwrap().to_string()).collect::>();
 
         let style = Some("openai".to_string());
-        let output = convert_messages_to_openai_format(messages, &style);
+        let output = convert_messages_to_openai_format(messages, &style, "Refact/gpt-4o");
 
         // println!("OUTPUT: {:#?}", output);
         let roles_out = output.iter().map(|x| x.get("role").unwrap().as_str().unwrap().to_string()).collect::>();
diff --git a/refact-agent/engine/src/scratchpads/scratchpad_utils.rs b/refact-agent/engine/src/scratchpads/scratchpad_utils.rs
index 4d535c4bb..eb5a797b0 100644
--- a/refact-agent/engine/src/scratchpads/scratchpad_utils.rs
+++ b/refact-agent/engine/src/scratchpads/scratchpad_utils.rs
@@ -2,7 +2,6 @@ use std::io::Cursor;
 use image::ImageReader;
 use regex::Regex;
 use serde_json::Value;
-use tokenizers::Tokenizer;
 use crate::call_validation::{ChatToolCall, ContextFile};
 use crate::postprocessing::pp_context_files::RESERVE_FOR_QUESTION_AND_FOLLOWUP;
 
@@ -34,16 +33,6 @@ impl HasRagResults {
     }
 }
 
-pub fn count_tokens(
-    tokenizer: &Tokenizer,
-    text: &str,
-) -> usize {
-    match tokenizer.encode_fast(text, false) {
-        Ok(tokens) => tokens.len(),
-        Err(_) => 0,
-    }
-}
-
 pub fn parse_image_b64_from_image_url_openai(image_url: &str) -> Option<(String, String, String)> {
     let re = Regex::new(r"data:(image/(png|jpeg|jpg|webp|gif));base64,([A-Za-z0-9+/=]+)").unwrap();
     re.captures(image_url).and_then(|captures| {
diff --git a/refact-agent/engine/src/scratchpads/token_count_cache.rs b/refact-agent/engine/src/scratchpads/token_count_cache.rs
index 327eef2ac..936e4d766 100644
--- a/refact-agent/engine/src/scratchpads/token_count_cache.rs
+++ b/refact-agent/engine/src/scratchpads/token_count_cache.rs
@@ -1,6 +1,5 @@
 use std::collections::HashMap;
 use std::sync::Arc;
-use std::sync::RwLock;
 use tokenizers::Tokenizer;
 use crate::call_validation::ChatMessage;
 
@@ -28,7 +27,7 @@ impl TokenCountCache {
     pub fn get_token_count(
         &mut self,
         msg: &ChatMessage,
-        tokenizer: Arc>,
+        tokenizer: Option>,
         extra_tokens_per_message: i32,
     ) -> Result {
         let key = Self::cache_key(msg);
diff --git a/refact-agent/engine/src/subchat.rs b/refact-agent/engine/src/subchat.rs
index 2531da670..f5e301ec4 100644
--- a/refact-agent/engine/src/subchat.rs
+++ b/refact-agent/engine/src/subchat.rs
@@ -5,11 +5,12 @@ use tokio::sync::Mutex as AMutex;
 use serde_json::{json, Value};
 use tracing::{error, info, warn};
 
+use crate::caps::resolve_chat_model;
+use crate::caps::ChatModelRecord;
 use crate::tools::tools_description::{tools_merged_and_filtered, tool_description_list_from_yaml};
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{SamplingParameters, PostprocessSettings, ChatPost, ChatMessage, ChatUsage, ChatToolCall, ReasoningEffort};
-use crate::global_context::{GlobalContext, try_load_caps_quickly_if_not_present, is_metadata_supported};
-use crate::http::routers::v1::chat::lookup_chat_scratchpad;
+use crate::global_context::{GlobalContext, try_load_caps_quickly_if_not_present};
 use crate::scratchpad_abstract::ScratchpadAbstract;
 use crate::scratchpads::multimodality::chat_content_raw_from_value;
 use crate::yaml_configs::customization_loader::load_customization;
@@ -21,7 +22,7 @@ const MAX_NEW_TOKENS: usize = 4096;
 pub async fn create_chat_post_and_scratchpad(
     global_context: Arc>,
     ccx: Arc>,
-    model_name: &str,
+    model_id: &str,
     messages: Vec<&ChatMessage>,
     temperature: Option,
     max_new_tokens: usize,
@@ -32,7 +33,7 @@ pub async fn create_chat_post_and_scratchpad(
     tool_choice: Option,
     only_deterministic_messages: bool,
     _should_execute_remotely: bool,
-) -> Result<(ChatPost, Box), String> {
+) -> Result<(ChatPost, Box, Arc), String> {
     let caps = try_load_caps_quickly_if_not_present(
         global_context.clone(), 0,
     ).await.map_err(|e| {
@@ -42,12 +43,7 @@ pub async fn create_chat_post_and_scratchpad(
     let mut error_log = Vec::new();
     let tconfig = load_customization(global_context.clone(), true, &mut error_log).await;
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
 
     let mut chat_post = ChatPost {
@@ -61,8 +57,7 @@ pub async fn create_chat_post_and_scratchpad(
             reasoning_effort,
             ..Default::default()  // TODO
         },
-        model: model_name.to_string(),
-        scratchpad: "".to_string(),
+        model: model_id.to_string(),
         stream: Some(false),
         temperature,
         n: Some(n),
@@ -74,38 +69,29 @@ pub async fn create_chat_post_and_scratchpad(
         ..Default::default()
     };
 
-    let (model_name, scratchpad_name, scratchpad_patch, n_ctx, supports_tools, _supports_multimodality, supports_clicks) = lookup_chat_scratchpad(
-        caps.clone(),
-        &chat_post,
-    ).await?;
+    let model_rec = resolve_chat_model(caps, model_id)?;
 
-    if !supports_tools {
-        warn!("supports_tools is false");
+    if !model_rec.supports_tools {
+        tracing::warn!("supports_tools is false");
     }
 
-    chat_post.max_tokens = Some(n_ctx);
-    chat_post.scratchpad = scratchpad_name.clone();
+    chat_post.max_tokens = Some(model_rec.base.n_ctx);
 
     {
         let mut ccx_locked = ccx.lock().await;
-        ccx_locked.current_model = model_name.to_string();
+        ccx_locked.current_model = model_id.to_string();
     }
 
     let scratchpad = crate::scratchpads::create_chat_scratchpad(
         global_context.clone(),
-        caps,
-        model_name.to_string(),
         &mut chat_post,
         &messages.into_iter().cloned().collect::>(),
         prepend_system_prompt,
-        &scratchpad_name,
-        &scratchpad_patch,
+        &model_rec,
         false,
-        supports_tools,
-        supports_clicks,
     ).await?;
 
-    Ok((chat_post, scratchpad))
+    Ok((chat_post, scratchpad, model_rec))
 }
 
 #[allow(dead_code)]
@@ -116,16 +102,14 @@ async fn chat_interaction_stream() {
 async fn chat_interaction_non_stream(
     ccx: Arc>,
     mut spad: Box,
+    model_rec: &ChatModelRecord,
     prompt: &String,
     chat_post: &ChatPost,
 ) -> Result>, String> {
-    let meta = {
-        let gcx = ccx.lock().await.global_context.clone();
-        if is_metadata_supported(gcx).await {
-            Some(chat_post.meta.clone())
-        } else {
-            None
-        }
+    let meta = if model_rec.base.support_metadata {
+        Some(chat_post.meta.clone())
+    } else {
+        None
     };
     
     let t1 = std::time::Instant::now();
@@ -134,7 +118,7 @@ async fn chat_interaction_non_stream(
         &mut spad,
         "chat".to_string(),
         prompt,
-        chat_post.model.clone(),
+        &model_rec.base,
         &chat_post.parameters,   // careful: includes n
         chat_post.only_deterministic_messages,
         meta
@@ -234,6 +218,7 @@ async fn chat_interaction_non_stream(
 pub async fn chat_interaction(
     ccx: Arc>,
     mut spad: Box,
+    model_rec: &ChatModelRecord,
     chat_post: &mut ChatPost,
 ) -> Result>, String> {
     let prompt = spad.prompt(ccx.clone(), &mut chat_post.parameters).await?;
@@ -244,6 +229,7 @@ pub async fn chat_interaction(
     Ok(chat_interaction_non_stream(
         ccx.clone(),
         spad,
+        model_rec,
         &prompt,
         chat_post,
     ).await?)
@@ -264,7 +250,7 @@ fn update_usage_from_messages(usage: &mut ChatUsage, messages: &Vec>,
-    model_name: &str,
+    model_id: &str,
     messages: Vec,
     tools_subset: Option>,
     tool_choice: Option,
@@ -295,16 +281,16 @@ pub async fn subchat_single(
         error!("Error loading compiled_in_tools: {:?}", e);
         vec![]
     });
-    let tools = tools_desclist.into_iter().filter(|x| x.is_supported_by(model_name)).map(|x|x.into_openai_style()).collect::>();
+    let tools = tools_desclist.into_iter().filter(|x| x.is_supported_by(model_id)).map(|x|x.into_openai_style()).collect::>();
     info!("tools_subset {:?}", tools_subset);
     info!("tools_turned_on_by_cmdline_set {:?}", tools_turned_on_by_cmdline_set);
     info!("tools_on_intersection {:?}", tools_on_intersection);
 
     let max_new_tokens = max_new_tokens.unwrap_or(MAX_NEW_TOKENS);
-    let (mut chat_post, spad) = create_chat_post_and_scratchpad(
+    let (mut chat_post, spad, model_rec) = create_chat_post_and_scratchpad(
         gcx.clone(),
         ccx.clone(),
-        model_name,
+        model_id,
         messages.iter().collect::>(),
         temperature,
         max_new_tokens,
@@ -317,7 +303,7 @@ pub async fn subchat_single(
         should_execute_remotely,
     ).await?;
 
-    let chat_response_msgs = chat_interaction(ccx.clone(), spad, &mut chat_post).await?;
+    let chat_response_msgs = chat_interaction(ccx.clone(), spad, &model_rec, &mut chat_post).await?;
 
     let old_messages = messages.clone();
     // no need to remove user from old_messages here, because allow_at is false
@@ -355,7 +341,7 @@ pub async fn subchat_single(
 
 pub async fn subchat(
     ccx: Arc>,
-    model_name: &str,
+    model_id: &str,
     messages: Vec,
     tools_subset: Vec,
     wrap_up_depth: usize,
@@ -393,7 +379,7 @@ pub async fn subchat(
             }
             messages = subchat_single(
                 ccx.clone(),
-                model_name,
+                model_id,
                 messages.clone(),
                 Some(tools_subset.clone()),
                 Some("auto".to_string()),
@@ -416,7 +402,7 @@ pub async fn subchat(
         if !tool_calls.is_empty() {
             messages = subchat_single(
                 ccx.clone(),
-                model_name,
+                model_id,
                 messages,
                 Some(vec![]),
                 Some("none".to_string()),
@@ -435,7 +421,7 @@ pub async fn subchat(
     messages.push(ChatMessage::new("user".to_string(), wrap_up_prompt.to_string()));
     let choices = subchat_single(
         ccx.clone(),
-        model_name,
+        model_id,
         messages,
         Some(tools_subset.clone()),
         Some("auto".to_string()),
@@ -455,7 +441,7 @@ pub async fn subchat(
             if !tool_calls.is_empty() {
                 _ = subchat_single(
                     ccx.clone(),
-                    model_name,
+                    model_id,
                     messages.clone(),
                     Some(vec![]),
                     Some("none".to_string()),
diff --git a/refact-agent/engine/src/telemetry/basic_transmit.rs b/refact-agent/engine/src/telemetry/basic_transmit.rs
index ea3ba1e9a..d46a91643 100644
--- a/refact-agent/engine/src/telemetry/basic_transmit.rs
+++ b/refact-agent/engine/src/telemetry/basic_transmit.rs
@@ -1,5 +1,5 @@
 use tracing::{error, info};
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use std::path::PathBuf;
 use serde_json::json;
 
@@ -107,7 +107,7 @@ pub async fn basic_telemetry_compress(
 
 pub async fn basic_telemetry_send(
     global_context: Arc>,
-    caps: Arc>,
+    caps: Arc,
 ) -> () {
     let (cache_dir, api_key, enable_basic_telemetry) = {
         let cx = global_context.write().await;
@@ -119,13 +119,11 @@ pub async fn basic_telemetry_send(
     };
     let (dir_compressed, dir_sent) = telemetry_storage_dirs(&cache_dir).await;
 
-    let telemetry_basic_dest = caps.read().unwrap().telemetry_basic_dest.clone();
-
-    if enable_basic_telemetry && !telemetry_basic_dest.is_empty() {
+    if enable_basic_telemetry && !caps.telemetry_basic_dest.is_empty() {
         send_telemetry_files_to_mothership(
             dir_compressed.clone(),
             dir_sent.clone(),
-            telemetry_basic_dest.clone(),
+            caps.telemetry_basic_dest.clone(),
             api_key,
             global_context.clone()
         ).await;
@@ -133,7 +131,7 @@ pub async fn basic_telemetry_send(
         if !enable_basic_telemetry {
             info!("basic telemetry sending not enabled, skip");
         }
-        if telemetry_basic_dest.is_empty() {
+        if caps.telemetry_basic_dest.is_empty() {
             info!("basic telemetry dest is empty, skip");
         }
     }
diff --git a/refact-agent/engine/src/tokens.rs b/refact-agent/engine/src/tokens.rs
new file mode 100644
index 000000000..0a7b7f438
--- /dev/null
+++ b/refact-agent/engine/src/tokens.rs
@@ -0,0 +1,226 @@
+use tokio::io::AsyncWriteExt;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::RwLock as ARwLock;
+use tokio::sync::Mutex as AMutex;
+use tokenizers::Tokenizer;
+use reqwest::header::AUTHORIZATION;
+use reqwest::Response;
+use uuid::Uuid;
+
+use crate::custom_error::MapErrToString;
+use crate::files_correction::canonical_path;
+use crate::global_context::GlobalContext;
+use crate::caps::{default_hf_tokenizer_template, strip_model_from_finetune, BaseModelRecord};
+
+
+async fn try_open_tokenizer(
+    res: Response,
+    to: impl AsRef,
+) -> Result<(), String> {
+    let mut file = tokio::fs::OpenOptions::new()
+        .write(true)
+        .create(true)
+        .open(&to)
+        .await
+        .map_err(|e| format!("failed to open file: {}", e))?;
+    file.write_all(&res.bytes().await
+        .map_err(|e| format!("failed to fetch bytes: {}", e))?
+    ).await.map_err(|e| format!("failed to write to file: {}", e))?;
+    file.flush().await.map_err(|e| format!("failed to flush file: {}", e))?;
+    tracing::info!("saved tokenizer to {}", to.as_ref().display());
+    Ok(())
+}
+
+async fn download_tokenizer_file(
+    http_client: &reqwest::Client,
+    http_path: &str,
+    tokenizer_api_token: &str,
+    to: &Path,
+) -> Result<(), String> {
+    tokio::fs::create_dir_all(
+        to.parent().ok_or_else(|| "tokenizer path has no parent")?,
+    ).await.map_err(|e| format!("failed to create parent dir: {}", e))?;
+    if to.exists() {
+        return Ok(());
+    }
+
+    tracing::info!("downloading tokenizer from {}", http_path);
+    let mut req = http_client.get(http_path);
+    
+    if !tokenizer_api_token.is_empty() {
+        req = req.header(AUTHORIZATION, format!("Bearer {tokenizer_api_token}"))
+    }
+    
+    let res = req
+        .send()
+        .await
+        .map_err(|e| format!("failed to get response: {}", e))?
+        .error_for_status()
+        .map_err(|e| format!("failed to get response: {}", e))?;
+    try_open_tokenizer(res, to).await?;
+    Ok(())
+}
+
+fn check_json_file(path: &Path) -> bool {
+    match Tokenizer::from_file(path) {
+        Ok(_) => { true }
+        Err(_) => { false }
+    }
+}
+
+async fn try_download_tokenizer_file_and_open(
+    http_client: &reqwest::Client,
+    http_path: &str,
+    tokenizer_api_token: &str,
+    path: &Path,
+) -> Result<(), String> {
+    if path.exists() && check_json_file(path) {
+        return Ok(());
+    }
+
+    let tmp_file = std::env::temp_dir().join(Uuid::new_v4().to_string());
+    let tmp_path = tmp_file.as_path();
+    
+    // Track the last error message
+    let mut last_error = String::from("");
+    for i in 0..15 {
+        if i != 0 {
+            tokio::time::sleep(Duration::from_millis(200)).await;
+        }
+        let res = download_tokenizer_file(http_client, http_path, tokenizer_api_token, tmp_path).await;
+        if let Err(err_msg) = res {
+            last_error = format!("failed to download tokenizer: {}", err_msg);
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        let parent = path.parent();
+        if parent.is_none() {
+            last_error = String::from("failed to download tokenizer: parent is not set");
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        let res = tokio::fs::create_dir_all(parent.unwrap()).await;
+        if let Err(err_msg) = res {
+            last_error = format!("failed to create parent dir: {}", err_msg);
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        if !check_json_file(tmp_path) {
+            last_error = String::from("failed to download tokenizer: file is not a tokenizer");
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        match tokio::fs::copy(tmp_path, path).await {
+            Ok(_) => {
+                tracing::info!("moved tokenizer to {}", path.display());
+                return Ok(());
+            },
+            Err(e) => { 
+                last_error = format!("failed to copy tokenizer file: {}", e);
+                tracing::error!("{last_error}");
+                continue; 
+            }
+        }
+    }
+    Err(last_error)
+}
+
+pub async fn cached_tokenizer(
+    global_context: Arc>,
+    model_rec: &BaseModelRecord,
+) -> Result>, String> {
+    let model_id = strip_model_from_finetune(&model_rec.id);
+    let tokenizer_download_lock: Arc> = global_context.read().await.tokenizer_download_lock.clone();
+    let _tokenizer_download_locked = tokenizer_download_lock.lock().await;
+
+    let (client2, cache_dir, tokenizer_in_gcx, hf_tokenizer_template) = {
+        let cx_locked = global_context.read().await;
+        let template = cx_locked.caps.clone().map(|caps| caps.hf_tokenizer_template.clone())
+            .unwrap_or_else(default_hf_tokenizer_template);
+        (cx_locked.http_client.clone(), cx_locked.cache_dir.clone(), cx_locked.tokenizer_map.clone().get(&model_id).cloned(), template)
+    };
+
+    if let Some(tokenizer) = tokenizer_in_gcx {
+        return Ok(tokenizer)
+    }
+
+    let (mut tok_file_path, tok_url) = match &model_rec.tokenizer {
+        empty_tok if empty_tok.is_empty() => return Err(format!("failed to load tokenizer: empty tokenizer for {model_id}")),
+        fake_tok if fake_tok.starts_with("fake") => return Ok(None),
+        hf_tok if hf_tok.starts_with("hf://") => {
+            let hf_model = hf_tok.strip_prefix("hf://").unwrap();
+            let url = hf_tokenizer_template.replace("$HF_MODEL", hf_model);
+            (PathBuf::new(), url)
+        }
+        http_tok if http_tok.starts_with("http://") || http_tok.starts_with("https://") => {
+            (PathBuf::new(), http_tok.to_string())
+        }
+        file_tok => {
+            let file = if file_tok.starts_with("file://") {
+                url::Url::parse(file_tok)
+                    .and_then(|url| url.to_file_path().map_err(|_| url::ParseError::EmptyHost))
+                    .map_err_with_prefix(format!("Invalid path URL {file_tok}:"))?
+            } else {
+                canonical_path(file_tok)
+            };
+            (canonical_path(file.to_string_lossy()), "".to_string())
+        }
+    };
+
+    if tok_file_path.as_os_str().is_empty() {
+        let tokenizer_cache_dir = std::path::PathBuf::from(cache_dir).join("tokenizers");
+        let sanitized_model_id = model_id.chars()
+            .map(|c| if c.is_alphanumeric() { c } else { '_' })
+            .collect::();
+        
+        tok_file_path = tokenizer_cache_dir.join(&sanitized_model_id).join("tokenizer.json");
+
+        try_download_tokenizer_file_and_open(&client2, &tok_url, &model_rec.tokenizer_api_key, &tok_file_path).await?;
+    }
+    
+    tracing::info!("loading tokenizer \"{}\"", tok_file_path.display());
+    let mut tokenizer = Tokenizer::from_file(tok_file_path)
+        .map_err(|e| format!("failed to load tokenizer: {}", e))?;
+    let _ = tokenizer.with_truncation(None);
+    tokenizer.with_padding(None);
+    let arc = Some(Arc::new(tokenizer));
+
+    global_context.write().await.tokenizer_map.insert(model_id, arc.clone());
+    Ok(arc)
+}
+
+/// Estimate as length / 3.5, since 3 is reasonable estimate for code, and 4 for natural language
+fn estimate_tokens(text: &str) -> usize {  1 + text.len() * 2 / 7 }
+
+pub fn count_text_tokens(
+    tokenizer: Option>,
+    text: &str,
+) -> Result {
+    match tokenizer {
+        Some(tokenizer) => {
+            match tokenizer.encode_fast(text, false) {
+                Ok(tokens) => Ok(tokens.len()),
+                Err(e) => Err(format!("Encoding error: {e}")),
+            }
+        }
+        None => {
+            Ok(estimate_tokens(text))
+        }
+    }
+}
+
+pub fn count_text_tokens_with_fallback(
+    tokenizer: Option>,
+    text: &str,
+) -> usize {
+    count_text_tokens(tokenizer, text).unwrap_or_else(|e| {
+        tracing::error!("{e}");
+        estimate_tokens(text)
+    })
+}
\ No newline at end of file
diff --git a/refact-agent/engine/src/tools/tool_create_memory_bank.rs b/refact-agent/engine/src/tools/tool_create_memory_bank.rs
index dad5a9713..46db232b9 100644
--- a/refact-agent/engine/src/tools/tool_create_memory_bank.rs
+++ b/refact-agent/engine/src/tools/tool_create_memory_bank.rs
@@ -14,7 +14,6 @@ use crate::{
         at_commands::AtCommandsContext,
         at_tree::{construct_tree_out_of_flat_list_of_paths, PathsHolderNodeArc},
     },
-    cached_tokenizers,
     call_validation::{ChatContent, ChatMessage, ChatUsage, ContextEnum, ContextFile, PostprocessSettings},
     files_correction::{get_project_dirs, paths_from_anywhere},
     files_in_workspace::{get_file_text_from_memory_or_disk, ls_files},
@@ -23,7 +22,7 @@ use crate::{
     subchat::subchat,
     tools::tools_description::Tool,
 };
-use crate::call_validation::ReasoningEffort;
+use crate::caps::resolve_chat_model;
 use crate::global_context::try_load_caps_quickly_if_not_present;
 
 const MAX_EXPLORATION_STEPS: usize = 1000;
@@ -263,7 +262,8 @@ async fn read_and_compress_directory(
     }
 
     let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await.map_err(|x| x.message)?;
-    let tokenizer = cached_tokenizers::cached_tokenizer(caps, gcx.clone(), model).await.map_err(|e| format!("Tokenizer error: {}", e))?;
+    let model_rec = resolve_chat_model(caps, &model)?;
+    let tokenizer = crate::tokens::cached_tokenizer(gcx.clone(), &model_rec.base).await?;
     let mut pp_settings = PostprocessSettings::new();
     pp_settings.max_files_n = context_files.len();
     let compressed = postprocess_context_files(
@@ -427,7 +427,7 @@ impl Tool for ToolCreateMemoryBank {
                     MB_EXPERT_WRAP_UP,
                     1,
                     None,
-                    Some(ReasoningEffort::High),
+                    None,
                     Some(tool_call_id.clone()),
                     Some(format!("{log_prefix}-memory-bank-dir-{}", target.target_name.replace("/", "_"))),
                     Some(false),
diff --git a/refact-agent/engine/src/tools/tool_deep_analysis.rs b/refact-agent/engine/src/tools/tool_deep_analysis.rs
index 70fe0f0a4..c833269a6 100644
--- a/refact-agent/engine/src/tools/tool_deep_analysis.rs
+++ b/refact-agent/engine/src/tools/tool_deep_analysis.rs
@@ -5,15 +5,15 @@ use serde_json::Value;
 use tokio::sync::Mutex as AMutex;
 use async_trait::async_trait;
 use axum::http::StatusCode;
+use crate::caps::resolve_chat_model;
 use crate::subchat::subchat_single;
+use crate::tokens::count_text_tokens_with_fallback;
 use crate::tools::tools_description::Tool;
 use crate::call_validation::{ChatMessage, ChatContent, ChatUsage, ContextEnum, SubchatParameters, ContextFile, PostprocessSettings};
 use crate::at_commands::at_commands::AtCommandsContext;
-use crate::cached_tokenizers;
 use crate::custom_error::ScratchError;
 use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::postprocessing::pp_context_files::postprocess_context_files;
-use crate::scratchpads::scratchpad_utils::count_tokens;
 
 pub struct ToolDeepAnalysis;
 
@@ -29,12 +29,13 @@ async fn _make_prompt(
 ) -> Result {
     let gcx = ccx.lock().await.global_context.clone();
     let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await.map_err(|x| x.message)?;
-    let tokenizer = cached_tokenizers::cached_tokenizer(caps, gcx.clone(), subchat_params.subchat_model.to_string()).await
-        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error loading tokenizer: {}", e))).map_err(|x| x.message)?;
+    let model_rec = resolve_chat_model(caps, &subchat_params.subchat_model)?;
+    let tokenizer = crate::tokens::cached_tokenizer(gcx.clone(), &model_rec.base).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e)).map_err(|x| x.message)?;
     let tokens_extra_budget = (subchat_params.subchat_n_ctx as f32 * TOKENS_EXTRA_BUDGET_PERCENT) as usize;
     let mut tokens_budget: i64 = (subchat_params.subchat_n_ctx - subchat_params.subchat_max_new_tokens - subchat_params.subchat_tokens_for_rag - tokens_extra_budget) as i64;
     let final_message = format!("***Problem:***\n{problem_statement}\n\n***Problem context:***\n");
-    tokens_budget -= count_tokens(&tokenizer.read().unwrap(), &final_message) as i64;
+    tokens_budget -= count_text_tokens_with_fallback(tokenizer.clone(), &final_message) as i64;
     let mut context = "".to_string(); 
     let mut context_files: Vec = vec![];
     for message in previous_messages.iter().rev() {
@@ -62,7 +63,7 @@ async fn _make_prompt(
                 continue;
             }
         };
-        let left_tokens = tokens_budget - count_tokens(&tokenizer.read().unwrap(), &message_row) as i64;
+        let left_tokens = tokens_budget - count_text_tokens_with_fallback(tokenizer.clone(), &message_row) as i64;
         if left_tokens < 0 {
             // we do not end here, maybe there are smaller useful messages at the beginning
             continue;
@@ -180,7 +181,7 @@ impl Tool for ToolDeepAnalysis {
     }
 
     fn tool_depends_on(&self) -> Vec {
-        vec![]
+        vec!["thinking".to_string()]
     }
 }
 
diff --git a/refact-agent/engine/src/tools/tools_description.rs b/refact-agent/engine/src/tools/tools_description.rs
index 51888e098..8af93006d 100644
--- a/refact-agent/engine/src/tools/tools_description.rs
+++ b/refact-agent/engine/src/tools/tools_description.rs
@@ -9,6 +9,7 @@ use tokio::sync::Mutex as AMutex;
 
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{ChatUsage, ContextEnum};
+use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::global_context::GlobalContext;
 use crate::integrations::integr_abstract::IntegrationConfirmation;
 use crate::tools::tools_execute::{command_should_be_confirmed_by_user, command_should_be_denied};
@@ -159,6 +160,11 @@ pub async fn tools_merged_and_filtered(
     ).await;
     tools_all.extend(integrations);
 
+    let is_there_a_thinking_model = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => caps.chat_models.get(&caps.defaults.chat_thinking_model).is_some(),
+        Err(_) => false,
+    };
+
     let mut filtered_tools = IndexMap::new();
     for (tool_name, tool) in tools_all {
         let dependencies = tool.tool_depends_on();
@@ -168,6 +174,9 @@ pub async fn tools_merged_and_filtered(
         if dependencies.contains(&"vecdb".to_string()) && !vecdb_on {
             continue;
         }
+        if dependencies.contains(&"thinking".to_string()) && !is_there_a_thinking_model {
+            continue;
+        }
         filtered_tools.insert(tool_name, tool);
     }
 
@@ -532,8 +541,8 @@ fn default_param_type() -> String {
 /// TODO: Think a better way to know if we can send array type to the model
 /// 
 /// For now, anthropic models support it, gpt models don't, for other, we'll need to test
-pub fn model_supports_array_param_type(model_name: &str) -> bool {
-    model_name.starts_with("claude")
+pub fn model_supports_array_param_type(model_id: &str) -> bool {
+    model_id.contains("claude")
 }
 
 pub fn make_openai_tool_value(
@@ -584,7 +593,7 @@ impl ToolDesc {
         if !model_supports_array_param_type(model) {
             for param in &self.parameters {
                 if param.param_type == "array" {
-                    tracing::error!("Tool {} has array parameter, but model {} does not support it", self.name, model);
+                    tracing::warn!("Tool {} has array parameter, but model {} does not support it", self.name, model);
                     return false;
                 }
             }
diff --git a/refact-agent/engine/src/tools/tools_execute.rs b/refact-agent/engine/src/tools/tools_execute.rs
index 08d5d2853..08d34d1b7 100644
--- a/refact-agent/engine/src/tools/tools_execute.rs
+++ b/refact-agent/engine/src/tools/tools_execute.rs
@@ -1,5 +1,5 @@
 use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use glob::Pattern;
 use indexmap::IndexMap;
 use tokio::sync::Mutex as AMutex;
@@ -9,7 +9,9 @@ use tracing::{info, warn};
 
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::at_commands::execute_at::MIN_RAG_CONTEXT_LIMIT;
-use crate::call_validation::{ChatMessage, ChatContent, ContextEnum, ContextFile, SubchatParameters};
+use crate::call_validation::{ChatContent, ChatMessage, ChatModelType, ContextEnum, ContextFile, SubchatParameters};
+use crate::custom_error::MapErrToString;
+use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::http::http_post_json;
 use crate::integrations::docker::docker_container_manager::docker_container_get_host_lsp_port_to_connect;
 use crate::postprocessing::pp_context_files::postprocess_context_files;
@@ -17,7 +19,7 @@ use crate::postprocessing::pp_plain_text::postprocess_plain_text;
 use crate::scratchpads::scratchpad_utils::{HasRagResults, max_tokens_for_rag_chat_by_tools};
 use crate::tools::tools_description::{MatchConfirmDenyResult, Tool};
 use crate::yaml_configs::customization_loader::load_customization;
-use crate::caps::get_model_record;
+use crate::caps::{is_cloud_model, resolve_chat_model, resolve_model};
 use crate::http::routers::v1::at_tools::{ToolExecuteResponse, ToolsExecutePost};
 
 
@@ -35,12 +37,7 @@ pub async fn unwrap_subchat_params(ccx: Arc>, tool_nam
             let mut error_log = Vec::new();
             let tconfig = load_customization(gcx.clone(), true, &mut error_log).await;
             for e in error_log.iter() {
-                tracing::error!(
-                    "{}:{} {:?}",
-                    crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-                    e.error_line,
-                    e.error_msg,
-                );
+                tracing::error!("{e}");
             }
             tconfig.subchat_tool_parameters.get(tool_name).cloned()
                 .ok_or_else(|| format!("subchat params for tool {} not found (checked in Post and in Customization)", tool_name))?
@@ -48,20 +45,47 @@ pub async fn unwrap_subchat_params(ccx: Arc>, tool_nam
     };
 
     // check if the models exist otherwise use the external chat model
-    match get_model_record(gcx, ¶ms.subchat_model).await {
-        Ok(_) => {}
-        Err(err) => {
-            let current_model = ccx.lock().await.current_model.clone();
-            warn!("subchat_model {} is not available: {}. Using {} model as a fallback", params.subchat_model, err, current_model);
-            params.subchat_model = current_model;
+    let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await.map_err_to_string()?;
+
+    if !params.subchat_model.is_empty() {
+        match resolve_chat_model(caps.clone(), ¶ms.subchat_model) {
+            Ok(_) => return Ok(params),
+            Err(e) => {
+                tracing::warn!("Specified subchat_model {} is not available: {}", params.subchat_model, e);
+            }
         }
     }
+
+    let current_model = ccx.lock().await.current_model.clone();
+    let model_to_resolve = match params.subchat_model_type {
+        ChatModelType::Light => &caps.defaults.chat_light_model,
+        ChatModelType::Default => &caps.defaults.chat_default_model,
+        ChatModelType::Thinking => &caps.defaults.chat_thinking_model,
+    };
+
+    params.subchat_model = match resolve_model(&caps.chat_models, model_to_resolve) {
+        Ok(model_rec) => {
+            if !is_cloud_model(¤t_model) && is_cloud_model(&model_rec.base.id)
+                && params.subchat_model_type != ChatModelType::Light {
+                current_model.to_string()
+            } else {
+                model_rec.base.id.clone()
+            }
+        },
+        Err(e) => {
+            tracing::warn!("{:?} model is not available: {}. Using {} model as a fallback.", 
+                params.subchat_model_type, e, current_model);
+            current_model
+        }
+    };
+
+    tracing::info!("using model for subchat: {}", params.subchat_model);
     Ok(params)
 }
 
 pub async fn run_tools_remotely(
     ccx: Arc>,
-    model_name: &str,
+    model_id: &str,
     maxgen: usize,
     original_messages: &[ChatMessage],
     stream_back_to_user: &mut HasRagResults,
@@ -87,7 +111,7 @@ pub async fn run_tools_remotely(
         maxgen,
         subchat_tool_parameters,
         postprocess_parameters,
-        model_name: model_name.to_string(),
+        model_name: model_id.to_string(),
         chat_id,
         style: style.clone(),
     };
@@ -109,7 +133,7 @@ pub async fn run_tools_remotely(
 pub async fn run_tools_locally(
     ccx: Arc>,
     tools: &mut IndexMap>,
-    tokenizer: Arc>,
+    tokenizer: Option>,
     maxgen: usize,
     original_messages: &Vec,
     stream_back_to_user: &mut HasRagResults,
@@ -131,7 +155,7 @@ pub async fn run_tools_locally(
 pub async fn run_tools(
     ccx: Arc>,
     tools: &mut IndexMap>,
-    tokenizer: Arc>,
+    tokenizer: Option>,
     maxgen: usize,
     original_messages: &Vec,
     style: &Option,
@@ -276,7 +300,7 @@ async fn pp_run_tools(
     generated_other: Vec,
     context_files_for_pp: &mut Vec,
     tokens_for_rag: usize,
-    tokenizer: Arc>,
+    tokenizer: Option>,
     style: &Option,
 ) -> (Vec, Vec) {
     let mut generated_tool = generated_tool.to_vec();
diff --git a/refact-agent/engine/src/vecdb/vdb_file_splitter.rs b/refact-agent/engine/src/vecdb/vdb_file_splitter.rs
index 8a46b771c..a62154ec9 100644
--- a/refact-agent/engine/src/vecdb/vdb_file_splitter.rs
+++ b/refact-agent/engine/src/vecdb/vdb_file_splitter.rs
@@ -1,14 +1,13 @@
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 
 use tokenizers::Tokenizer;
 use tokio::sync::RwLock as ARwLock;
 
 use crate::ast::chunk_utils::get_chunks;
-use crate::ast::count_tokens;
 use crate::ast::file_splitter::LINES_OVERLAP;
 use crate::files_in_workspace::Document;
 use crate::global_context::GlobalContext;
+use crate::tokens::count_text_tokens_with_fallback;
 use crate::vecdb::vdb_structs::SplitResult;
 
 pub struct FileSplitter {
@@ -24,7 +23,7 @@ impl FileSplitter {
     }
 
     pub async fn vectorization_split(&self, doc: &Document,
-                                     tokenizer: Option>>,
+                                     tokenizer: Option>,
                                      tokens_limit: usize,
                                      global_context: Arc>
     ) -> Result, String> {
@@ -41,7 +40,7 @@ impl FileSplitter {
         let mut top_row: i32 = -1;
         let lines = text.split('\n').collect::>();
         for (line_idx, line) in lines.iter().enumerate() {
-            let text_orig_tok_n = count_tokens(tokenizer.clone(), line);
+            let text_orig_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line);
             if top_row == -1 && text_orig_tok_n != 0 { // top lines are empty
                 top_row = line_idx as i32;
             }
diff --git a/refact-agent/engine/src/vecdb/vdb_highlev.rs b/refact-agent/engine/src/vecdb/vdb_highlev.rs
index c57c79b6a..bbb11b4b0 100644
--- a/refact-agent/engine/src/vecdb/vdb_highlev.rs
+++ b/refact-agent/engine/src/vecdb/vdb_highlev.rs
@@ -7,7 +7,6 @@ use async_trait::async_trait;
 use tracing::{error, info};
 
 use crate::background_tasks::BackgroundTasksHolder;
-use crate::caps::get_custom_embedding_api_key;
 use crate::fetch_embedding;
 use crate::global_context::{CommandLine, GlobalContext};
 use crate::knowledge::{MemdbSubEvent, MemoriesDatabase};
@@ -17,15 +16,6 @@ use crate::vecdb::vdb_structs::{MemoRecord, MemoSearchResult, SearchResult, VecD
 use crate::vecdb::vdb_thread::{vecdb_start_background_tasks, vectorizer_enqueue_dirty_memory, vectorizer_enqueue_files, FileVectorizerService};
 
 
-fn model_to_rejection_threshold(embedding_model: &str) -> f32 {
-    match embedding_model {
-        "text-embedding-3-small" => 0.63,
-        "thenlper_gte" => 0.25,
-        _ => 0.63,
-    }
-}
-
-
 pub struct VecDb {
     pub memdb: Arc>,
     vecdb_emb_client: Arc>,
@@ -49,24 +39,10 @@ async fn do_i_need_to_reload_vecdb(
 
     let vecdb_max_files = gcx.read().await.cmdline.vecdb_max_files;
     let mut consts = {
-        let caps_locked = caps.read().unwrap();
-        let mut b = caps_locked.embedding_batch;
-        if b == 0 {
-            b = 64;
-        }
-        if b > 256 {
-            tracing::warn!("embedding_batch can't be higher than 256");
-            b = 64;
-        }
         VecdbConstants {
-            embedding_model: caps_locked.embedding_model.clone(),
-            embedding_size: caps_locked.embedding_size,
-            embedding_batch: b,
-            vectorizer_n_ctx: caps_locked.embedding_n_ctx,
+            embedding_model: caps.embedding_model.clone(),
             tokenizer: None,
-            endpoint_embeddings_template: caps_locked.endpoint_embeddings_template.clone(),
-            endpoint_embeddings_style: caps_locked.endpoint_embeddings_style.clone(),
-            splitter_window_size: caps_locked.embedding_n_ctx / 2,
+            splitter_window_size: caps.embedding_model.base.n_ctx / 2,
             vecdb_max_files: vecdb_max_files,
         }
     };
@@ -77,30 +53,29 @@ async fn do_i_need_to_reload_vecdb(
         Some(ref db) => {
             if
                 db.constants.embedding_model == consts.embedding_model &&
-                db.constants.endpoint_embeddings_template == consts.endpoint_embeddings_template &&
-                db.constants.endpoint_embeddings_style == consts.endpoint_embeddings_style &&
-                db.constants.splitter_window_size == consts.splitter_window_size &&
-                db.constants.embedding_batch == consts.embedding_batch &&
-                db.constants.embedding_size == consts.embedding_size
+                db.constants.splitter_window_size == consts.splitter_window_size
             {
                 return (false, None);
             }
         }
     }
 
-    if consts.embedding_model.is_empty() || consts.endpoint_embeddings_template.is_empty() {
-        error!("command line says to launch vecdb, but this will not happen: embedding_model.is_empty() || endpoint_embeddings_template.is_empty()");
+    if consts.embedding_model.base.name.is_empty() || consts.embedding_model.base.endpoint.is_empty() {
+        error!("command line says to launch vecdb, but this will not happen: embedding model name or endpoint are empty");
         return (true, None);
     }
 
-    let tokenizer_maybe = crate::cached_tokenizers::cached_tokenizer(
-        caps.clone(), gcx.clone(), consts.embedding_model.clone()).await;
-    if tokenizer_maybe.is_err() {
-        error!("vecdb launch failed, embedding model tokenizer didn't load: {}", tokenizer_maybe.unwrap_err());
-        return (false, None);
-    }
-    consts.tokenizer = Some(tokenizer_maybe.clone().unwrap());
-
+    let tokenizer_result = crate::tokens::cached_tokenizer(
+        gcx.clone(), &consts.embedding_model.base,
+    ).await;
+    
+    consts.tokenizer = match tokenizer_result {
+        Ok(tokenizer) => tokenizer,
+        Err(err) => {
+            error!("vecdb launch failed, embedding model tokenizer didn't load: {}", err);
+            return (false, None);
+        }
+    };
     return (true, Some(consts));
 }
 
@@ -166,17 +141,15 @@ impl VecDb {
         config_dir: &PathBuf,
         cmdline: CommandLine,
         constants: VecdbConstants,
-        api_key: &String
     ) -> Result {
         let emb_table_name = crate::vecdb::vdb_emb_aux::create_emb_table_name(&vec![cmdline.workspace_folder]);
-        let handler = VecDBSqlite::init(cache_dir, &constants.embedding_model, constants.embedding_size, &emb_table_name).await?;
+        let handler = VecDBSqlite::init(cache_dir, &constants.embedding_model.base.name, constants.embedding_model.embedding_size, &emb_table_name).await?;
         let vecdb_handler = Arc::new(AMutex::new(handler));
         let memdb = Arc::new(AMutex::new(MemoriesDatabase::init(config_dir, &constants, &emb_table_name, cmdline.reset_memory).await?));
 
         let vectorizer_service = Arc::new(AMutex::new(FileVectorizerService::new(
             vecdb_handler.clone(),
             constants.clone(),
-            api_key.clone(),
             memdb.clone(),
         ).await));
 
@@ -423,18 +396,10 @@ pub async fn memories_search(
         )
     };
 
-    let api_key = get_custom_embedding_api_key(gcx.clone()).await;
-    if let Err(err) = api_key {
-        return Err(err.message);
-    }
-
-    let embedding = fetch_embedding::get_embedding_with_retry(
+    let embedding = fetch_embedding::get_embedding_with_retries(
         vecdb_emb_client,
-        &constants.endpoint_embeddings_style,
         &constants.embedding_model,
-        &constants.endpoint_embeddings_template,
         vec![query.clone()],
-        &api_key.unwrap(),
         5,
     ).await?;
     if embedding.is_empty() {
@@ -452,10 +417,9 @@ pub async fn memories_search(
         score_a.partial_cmp(&score_b).unwrap_or(std::cmp::Ordering::Equal)
     });
 
-    let rejection_threshold = model_to_rejection_threshold(constants.embedding_model.as_str());
     let mut filtered_results = Vec::new();
     for rec in results.iter() {
-        if rec.distance.abs() >= rejection_threshold {
+        if rec.distance.abs() >= constants.embedding_model.rejection_threshold {
             info!("distance {:.3} -> dropped memory {}", rec.distance, rec.memid);
         } else {
             info!("distance {:.3} -> kept memory {}", rec.distance, rec.memid);
@@ -512,17 +476,13 @@ impl VecdbSearch for VecDb {
         query: String,
         top_n: usize,
         vecdb_scope_filter_mb: Option,
-        api_key: &String,
     ) -> Result {
         // TODO: move out of struct, replace self with Arc
         let t0 = std::time::Instant::now();
-        let embedding_mb = fetch_embedding::get_embedding_with_retry(
+        let embedding_mb = fetch_embedding::get_embedding_with_retries(
             self.vecdb_emb_client.clone(),
-            &self.constants.endpoint_embeddings_style,
             &self.constants.embedding_model,
-            &self.constants.endpoint_embeddings_template,
             vec![query.clone()],
-            api_key,
             5,
         ).await;
         if embedding_mb.is_err() {
@@ -542,7 +502,7 @@ impl VecdbSearch for VecDb {
         info!("search itself {:.3}s", t1.elapsed().as_secs_f64());
         let mut dist0 = 0.0;
         let mut filtered_results = Vec::new();
-        let rejection_threshold = model_to_rejection_threshold(self.constants.embedding_model.as_str());
+        let rejection_threshold = self.constants.embedding_model.rejection_threshold;
         info!("rejection_threshold {:.3}", rejection_threshold);
         for rec in results.iter_mut() {
             if dist0 == 0.0 {
diff --git a/refact-agent/engine/src/vecdb/vdb_init.rs b/refact-agent/engine/src/vecdb/vdb_init.rs
index 4d304bde5..454a0b0fa 100644
--- a/refact-agent/engine/src/vecdb/vdb_init.rs
+++ b/refact-agent/engine/src/vecdb/vdb_init.rs
@@ -5,7 +5,6 @@ use tokio::sync::Mutex as AMutex;
 use tokio::time::sleep;
 use tracing::{debug, error, info, warn};
 
-use crate::caps::get_custom_embedding_api_key;
 use crate::global_context::{CommandLine, GlobalContext};
 use crate::vecdb::vdb_highlev::VecDb;
 use crate::vecdb::vdb_structs::{VecdbConstants, VecdbSearch};
@@ -34,7 +33,6 @@ impl Default for VecDbInitConfig {
 
 #[derive(Debug)]
 pub enum VecDbInitError {
-    ApiKeyError(String),
     InitializationError(String),
     TestSearchError(String),
 }
@@ -42,7 +40,6 @@ pub enum VecDbInitError {
 impl std::fmt::Display for VecDbInitError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            VecDbInitError::ApiKeyError(msg) => write!(f, "API key error: {}", msg),
             VecDbInitError::InitializationError(msg) => write!(f, "Initialization error: {}", msg),
             VecDbInitError::TestSearchError(msg) => write!(f, "Test search error: {}", msg),
         }
@@ -54,7 +51,6 @@ pub async fn init_vecdb_fail_safe(
     config_dir: &PathBuf,
     cmdline: CommandLine,
     constants: VecdbConstants,
-    api_key: &String,
     init_config: VecDbInitConfig,
 ) -> Result {
     let mut attempt: usize = 0;
@@ -64,12 +60,12 @@ pub async fn init_vecdb_fail_safe(
         attempt += 1;
         info!("VecDb init attempt {}/{}", attempt, init_config.max_attempts);
         
-        match VecDb::init(cache_dir, config_dir, cmdline.clone(), constants.clone(), api_key).await {
+        match VecDb::init(cache_dir, config_dir, cmdline.clone(), constants.clone()).await {
             Ok(vecdb) => {
                 info!("Successfully initialized VecDb on attempt {}", attempt);
                 
                 if init_config.test_search_after_init {
-                    match vecdb_test_search(&vecdb, api_key).await {
+                    match vecdb_test_search(&vecdb).await {
                         Ok(_) => {
                             info!("VecDb test search successful");
                             return Ok(vecdb);
@@ -104,12 +100,12 @@ pub async fn init_vecdb_fail_safe(
     }
 }
 
-async fn vecdb_test_search(vecdb: &VecDb, api_key: &String) -> Result<(), String> {
+async fn vecdb_test_search(vecdb: &VecDb) -> Result<(), String> {
     let test_query = "test query".to_string();
     let top_n = 3;
     let filter = None;
     
-    match VecdbSearch::vecdb_search(vecdb, test_query, top_n, filter, api_key).await {
+    match VecdbSearch::vecdb_search(vecdb, test_query, top_n, filter).await {
         Ok(_) => Ok(()),
         Err(e) => Err(format!("Test search failed: {}", e)),
     }
@@ -120,10 +116,6 @@ pub async fn initialize_vecdb_with_context(
     constants: VecdbConstants,
     init_config: Option,
 ) -> Result<(), VecDbInitError> {
-    let api_key = match get_custom_embedding_api_key(gcx.clone()).await {
-        Ok(key) => key,
-        Err(err) => return Err(VecDbInitError::ApiKeyError(err.message)),
-    };
     
     let (cache_dir, config_dir, cmdline) = {
         let gcx_locked = gcx.read().await;
@@ -141,7 +133,6 @@ pub async fn initialize_vecdb_with_context(
         &base_dir_config,
         cmdline.clone(),
         constants,
-        &api_key,
         config,
     ).await?;
     
diff --git a/refact-agent/engine/src/vecdb/vdb_remote.rs b/refact-agent/engine/src/vecdb/vdb_remote.rs
index dc2260fce..930eb828e 100644
--- a/refact-agent/engine/src/vecdb/vdb_remote.rs
+++ b/refact-agent/engine/src/vecdb/vdb_remote.rs
@@ -17,7 +17,6 @@ impl VecdbSearch for VecDbRemote {
         query: String,
         top_n: usize,
         _vecdb_scope_filter_mb: Option,
-        _api_key: &String,
     ) -> Result {
         // NOTE: if you're going to use https make sure that you set insecure flag from cmdline
         let url = "http://127.0.0.1:8008/v1/vdb-search".to_string();
diff --git a/refact-agent/engine/src/vecdb/vdb_structs.rs b/refact-agent/engine/src/vecdb/vdb_structs.rs
index de29dd37e..6a06a0cde 100644
--- a/refact-agent/engine/src/vecdb/vdb_structs.rs
+++ b/refact-agent/engine/src/vecdb/vdb_structs.rs
@@ -1,12 +1,13 @@
 use std::fmt::Debug;
 use std::path::PathBuf;
-use std::sync::RwLock as StdRwLock;
 use std::sync::Arc;
 use serde::{Deserialize, Serialize};
 use indexmap::IndexMap;
 use tokenizers::Tokenizer;
 use async_trait::async_trait;
 
+use crate::caps::EmbeddingModelRecord;
+
 
 #[async_trait]
 pub trait VecdbSearch: Send {
@@ -15,20 +16,14 @@ pub trait VecdbSearch: Send {
         query: String,
         top_n: usize,
         filter_mb: Option,
-        api_key: &String,
     ) -> Result;
 }
 
 #[derive(Debug, Clone)]
 pub struct VecdbConstants {
     // constant in a sense it cannot be changed without creating a new db
-    pub embedding_model: String,
-    pub embedding_size: i32,
-    pub embedding_batch: usize,
-    pub tokenizer: Option>>,
-    pub vectorizer_n_ctx: usize,
-    pub endpoint_embeddings_template: String,
-    pub endpoint_embeddings_style: String,
+    pub embedding_model: EmbeddingModelRecord,
+    pub tokenizer: Option>,
     pub splitter_window_size: usize,
     pub vecdb_max_files: usize,
 }
diff --git a/refact-agent/engine/src/vecdb/vdb_thread.rs b/refact-agent/engine/src/vecdb/vdb_thread.rs
index b41d47e0b..9fb5492f1 100644
--- a/refact-agent/engine/src/vecdb/vdb_thread.rs
+++ b/refact-agent/engine/src/vecdb/vdb_thread.rs
@@ -11,7 +11,7 @@ use tokio::task::JoinHandle;
 use tracing::{info, warn};
 
 use crate::ast::file_splitter::AstBasedFileSplitter;
-use crate::fetch_embedding::get_embedding_with_retry;
+use crate::fetch_embedding::get_embedding_with_retries;
 use crate::files_in_workspace::{is_path_to_enqueue_valid, Document};
 use crate::global_context::GlobalContext;
 use crate::knowledge::{vectorize_dirty_memories, MemoriesDatabase};
@@ -33,7 +33,6 @@ pub struct FileVectorizerService {
     pub vstatus: Arc>,
     pub vstatus_notify: Arc,   // fun stuff https://docs.rs/tokio/latest/tokio/sync/struct.Notify.html
     constants: VecdbConstants,
-    api_key: String,
     memdb: Arc>,
     vecdb_todo: Arc>>,
 }
@@ -44,21 +43,17 @@ async fn vectorize_batch_from_q(
     vstatus: Arc>,
     client: Arc>,
     constants: &VecdbConstants,
-    api_key: &String,
     vecdb_handler_arc: Arc>,
-    #[allow(non_snake_case)]
-    B: usize,
 ) -> Result<(), String> {
+    #[allow(non_snake_case)]
+    let B = constants.embedding_model.embedding_batch;
     let batch = run_actual_model_on_these.drain(..B.min(run_actual_model_on_these.len())).collect::>();
     assert!(batch.len() > 0);
 
-    let batch_result = match get_embedding_with_retry(
+    let batch_result = match get_embedding_with_retries(
         client.clone(),
-        &constants.endpoint_embeddings_style.clone(),
-        &constants.embedding_model.clone(),
-        &constants.endpoint_embeddings_template.clone(),
+        &constants.embedding_model,
         batch.iter().map(|x| x.window_text.clone()).collect(),
-        api_key,
         10,
     ).await {
         Ok(res) => res,
@@ -170,7 +165,6 @@ async fn vectorize_thread(
         vecdb_handler_arc,
         vstatus,
         vstatus_notify,
-        api_key
     ) = {
         let vservice_locked = vservice.lock().await;
         (
@@ -180,7 +174,6 @@ async fn vectorize_thread(
             vservice_locked.vecdb_handler.clone(),
             vservice_locked.vstatus.clone(),
             vservice_locked.vstatus_notify.clone(),
-            vservice_locked.api_key.clone()
         )
     };
 
@@ -238,7 +231,7 @@ async fn vectorize_thread(
         loop {
             if
             run_actual_model_on_these.len() > 0 && flush ||
-                run_actual_model_on_these.len() >= constants.embedding_batch
+                run_actual_model_on_these.len() >= constants.embedding_model.embedding_batch
             {
                 if let Err(err) = vectorize_batch_from_q(
                     &mut run_actual_model_on_these,
@@ -246,9 +239,7 @@ async fn vectorize_thread(
                     vstatus.clone(),
                     client.clone(),
                     &constants,
-                    &api_key,
                     vecdb_handler_arc.clone(),
-                    constants.embedding_batch,
                 ).await {
                     tracing::error!("{}", err);
                     continue;
@@ -281,8 +272,7 @@ async fn vectorize_thread(
                         vecdb_handler_arc.clone(),
                         vstatus.clone(),
                         client.clone(),
-                        &api_key,
-                        constants.embedding_batch,
+                        constants.embedding_model.embedding_batch,
                     ).await;
                     info!("/MEMDB {:?}", r);
                     continue;
@@ -353,7 +343,7 @@ async fn vectorize_thread(
         }
 
         let file_splitter = AstBasedFileSplitter::new(constants.splitter_window_size);
-        let mut splits = file_splitter.vectorization_split(&doc, None, gcx.clone(), constants.vectorizer_n_ctx).await.unwrap_or_else(|err| {
+        let mut splits = file_splitter.vectorization_split(&doc, None, gcx.clone(), constants.embedding_model.base.n_ctx).await.unwrap_or_else(|err| {
             info!("{}", err);
             vec![]
         });
@@ -424,7 +414,6 @@ impl FileVectorizerService {
     pub async fn new(
         vecdb_handler: Arc>,
         constants: VecdbConstants,
-        api_key: String,
         memdb: Arc>,
     ) -> Self {
         let vstatus = Arc::new(AMutex::new(
@@ -446,7 +435,6 @@ impl FileVectorizerService {
             vstatus: vstatus.clone(),
             vstatus_notify: Arc::new(ANotify::new()),
             constants,
-            api_key,
             memdb,
             vecdb_todo: Default::default(),
         }
diff --git a/refact-agent/engine/src/yaml_configs/create_configs.rs b/refact-agent/engine/src/yaml_configs/create_configs.rs
index ac09b9853..38e13e75e 100644
--- a/refact-agent/engine/src/yaml_configs/create_configs.rs
+++ b/refact-agent/engine/src/yaml_configs/create_configs.rs
@@ -17,14 +17,18 @@ pub async fn yaml_configs_try_create_all(gcx: Arc>) -> St
     let mut results = Vec::new();
     let config_dir = gcx.read().await.config_dir.clone();
 
-    let integrations_d = config_dir.join("integrations.d");
-    if let Err(e) = tokio::fs::create_dir_all(&integrations_d).await {
-        tracing::warn!("Failed to create directory {:?}: {}", integrations_d, e);
-        results.push(format!("Error creating directory {:?}: {}", integrations_d, e));
+    let dirs_to_create = [
+        config_dir.join("integrations.d"),
+        config_dir.join("providers.d"),
+    ];
+    for dir in dirs_to_create {
+        if let Err(e) = tokio::fs::create_dir_all(&dir).await {
+            tracing::warn!("Failed to create directory {:?}: {}", dir, e);
+            results.push(format!("Error creating directory {:?}: {}", dir, e));
+        }
     }
 
     let files = vec![
-        ("bring-your-own-key.yaml", crate::caps::BRING_YOUR_OWN_KEY_SAMPLE),
         ("customization.yaml", include_str!("default_customization.yaml")),
         ("privacy.yaml", include_str!("default_privacy.yaml")),
         ("indexing.yaml", include_str!("default_indexing.yaml")),
@@ -33,7 +37,7 @@ pub async fn yaml_configs_try_create_all(gcx: Arc>) -> St
 
     for (file_name, content) in files {
         let file_path = if file_name == "integrations.d/shell.yaml" {
-            integrations_d.join("shell.yaml")
+            config_dir.join("integrations.d").join("shell.yaml")
         } else {
             config_dir.join(file_name)
         };
diff --git a/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml b/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml
index 63186b55a..91a573b3f 100644
--- a/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml
+++ b/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml
@@ -75,7 +75,7 @@ PROMPT_AGENTIC_TOOLS: |
       - You also can use `locate()` with the task description to find all necessary files automatically.
       - Use `cat("filename")` to look inside the most important files without compression.
     - Check any files that might indirectly relate to the task.
-    - Running available validation tools preliminary - is a good idea. 
+    - Running available validation tools preliminary - is a good idea.
   **Step 3: Make a Clear Plan**
     - Goal: Create a clear, validated plan before making changes.
     - After gathering context (Step 2), create your plan independently.
@@ -111,7 +111,7 @@ PROMPT_AGENTIC_TOOLS: |
   %WORKSPACE_INFO%
 
   %PROJECT_SUMMARY%
-  
+
   **Always test you solutions!**
   **Clearly comment before each action.**
   **Document results carefully.**
@@ -235,23 +235,23 @@ system_prompts:
 
 subchat_tool_parameters:
   locate:
-    subchat_model: "gpt-4o-mini"
+    subchat_model_type: "light"
     subchat_tokens_for_rag: 30000
     subchat_n_ctx: 32000
     subchat_max_new_tokens: 8000
   locate_search:
-    subchat_model: "gpt-4o-mini"
+    subchat_model_type: "light"
     subchat_tokens_for_rag: 10000
     subchat_n_ctx: 16000
     subchat_max_new_tokens: 2000
   deep_analysis:
-    subchat_model: "o3-mini"
+    subchat_model_type: "thinking"
     subchat_tokens_for_rag: 70000
     subchat_n_ctx: 128000
     subchat_max_new_tokens: 32000
     subchat_reasoning_effort: "high"
   create_memory_bank:
-    subchat_model: "o3-mini"
+    subchat_model_type: "default"
     subchat_tokens_for_rag: 88000
     subchat_n_ctx: 128000
     subchat_max_new_tokens: 32000
diff --git a/refact-agent/engine/src/yaml_configs/customization_loader.rs b/refact-agent/engine/src/yaml_configs/customization_loader.rs
index 644c4c49a..48c2efbb4 100644
--- a/refact-agent/engine/src/yaml_configs/customization_loader.rs
+++ b/refact-agent/engine/src/yaml_configs/customization_loader.rs
@@ -7,7 +7,7 @@ use tokio::sync::RwLock as ARwLock;
 
 use crate::call_validation::{ChatMessage, SubchatParameters};
 use crate::global_context::{GlobalContext, try_load_caps_quickly_if_not_present};
-use crate::integrations::setting_up_integrations::YamlError;
+use crate::custom_error::YamlError;
 
 
 #[derive(Debug, Serialize, Deserialize, Default)]
@@ -131,7 +131,7 @@ pub fn load_and_mix_with_users_config(
     let user_unstructured: serde_yaml::Value = serde_yaml::from_str(user_yaml)
         .map_err(|e| {
             error_log.push(YamlError {
-                integr_config_path: "customization.yaml".to_string(),
+                path: "customization.yaml".to_string(),
                 error_line: 0,
                 error_msg: e.to_string(),
             });
@@ -146,7 +146,7 @@ pub fn load_and_mix_with_users_config(
     let mut user_config: CustomizationYaml = serde_yaml::from_str(user_yaml)
         .map_err(|e| {
             error_log.push(YamlError {
-                integr_config_path: "customization.yaml".to_string(),
+                path: "customization.yaml".to_string(),
                 error_line: 0,
                 error_msg: e.to_string(),
             });
@@ -155,7 +155,7 @@ pub fn load_and_mix_with_users_config(
     let caps_config: CustomizationYaml = serde_yaml::from_str(caps_yaml)
         .map_err(|e| {
             error_log.push(YamlError {
-                integr_config_path: "caps.yaml".to_string(),
+                path: "caps.yaml".to_string(),
                 error_line: 0,
                 error_msg: e.to_string(),
             });
@@ -207,8 +207,9 @@ pub async fn load_customization(
     let caps = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
         Ok(caps) => caps,
         Err(e) => {
+            let address_url = gcx.read().await.cmdline.address_url.clone();
             error_log.push(YamlError {
-                integr_config_path: "bring-your-own-key.yaml".to_string(),
+                path: address_url,
                 error_line: 0,
                 error_msg: format!("error loading caps: {e}"),
             });
@@ -216,11 +217,6 @@ pub async fn load_customization(
         }
     };
 
-    let caps_config_text = {
-        let caps_locked = caps.read().unwrap();
-        caps_locked.customization.clone()
-    };
-
     let config_dir = gcx.read().await.config_dir.clone();
     let customization_yaml_path = config_dir.join("customization.yaml");
     let user_config_text = std::fs::read_to_string(&customization_yaml_path)
@@ -229,7 +225,7 @@ pub async fn load_customization(
 
     load_and_mix_with_users_config(
         &user_config_text,
-        &caps_config_text,
+        &caps.customization,
         skip_visibility_filtering,
         allow_experimental,
         error_log,
@@ -247,12 +243,7 @@ mod tests {
             "", "", true, true, &mut error_log,
         );
         for e in error_log.iter() {
-            eprintln!(
-                "{}:{} {:?}",
-                crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-                e.error_line,
-                e.error_msg,
-            );
+            eprintln!("{e}");
         }
         assert!(error_log.is_empty(), "There were errors in the error_log");
         assert_eq!(config.system_prompts.get("default").is_some(), true);
diff --git a/refact-agent/engine/src/yaml_configs/default_privacy.yaml b/refact-agent/engine/src/yaml_configs/default_privacy.yaml
index 404b0d927..9602b494f 100644
--- a/refact-agent/engine/src/yaml_configs/default_privacy.yaml
+++ b/refact-agent/engine/src/yaml_configs/default_privacy.yaml
@@ -13,7 +13,7 @@ privacy_rules:
     - "*/secret_project2/*.txt"
     - "*.pem"
 
-  only_send_to_servers_I_control:       # You can set up which ones you control in bring-your-own-key.yaml, otherwise you control none
+  only_send_to_servers_I_control:       # You can set up which providers you control in ~/.config/refact/providers.d/*.yaml, otherwise you control none
     - "secret_passwords.txt"
 
 
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/anthropic.yaml b/refact-agent/engine/src/yaml_configs/default_providers/anthropic.yaml
new file mode 100644
index 000000000..83702cddc
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/anthropic.yaml
@@ -0,0 +1,27 @@
+chat_endpoint: https://api.anthropic.com/v1/chat/completions
+supports_completion: false
+
+api_key: sk-ant-...
+
+chat_models:
+  claude-3-7-sonnet-latest:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_clicks: true
+    supports_agent: true
+    supports_reasoning: anthropic
+    tokenizer: hf://Xenova/claude-tokenizer
+
+running_models:
+  - claude-3-7-sonnet-latest
+  - claude-3-5-sonnet-latest
+  - claude-3-5-haiku-latest
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_agent: true
+    tokenizer: hf://Xenova/claude-tokenizer
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/custom.yaml b/refact-agent/engine/src/yaml_configs/default_providers/custom.yaml
new file mode 100644
index 000000000..50e489680
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/custom.yaml
@@ -0,0 +1,7 @@
+supports_completion: true
+
+model_default_settings_ui:
+  chat:
+    tokenizer: fake
+  completion:
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/deepseek.yaml b/refact-agent/engine/src/yaml_configs/default_providers/deepseek.yaml
new file mode 100644
index 000000000..144699d03
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/deepseek.yaml
@@ -0,0 +1,15 @@
+chat_endpoint: https://api.deepseek.com/v1/chat/completions
+supports_completion: false
+
+api_key: sk-...
+
+running_models:
+  - deepseek-chat
+  - deepseek-reasoner
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 64000
+    supports_tools: true
+    supports_agent: true
+    tokenizer: hf://deepseek-ai/DeepSeek-...
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/google_gemini.yaml b/refact-agent/engine/src/yaml_configs/default_providers/google_gemini.yaml
new file mode 100644
index 000000000..b422e2780
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/google_gemini.yaml
@@ -0,0 +1,19 @@
+chat_endpoint:       https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
+embedding_endpoint:  https://generativelanguage.googleapis.com/v1beta/openai/embeddings
+supports_completion: false
+
+api_key: AI...
+
+running_models:
+  - gemini-2.5-pro-exp-03-25
+  - gemini-2.5-pro-preview-03-25
+  - models/gemini-2.0-flash
+  - models/gemini-2.0-flash-lite
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_agent: true
+    tokenizer: hf://Xenova/gemma2-tokenizer
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/groq.yaml b/refact-agent/engine/src/yaml_configs/default_providers/groq.yaml
new file mode 100644
index 000000000..2c6702da2
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/groq.yaml
@@ -0,0 +1,14 @@
+chat_endpoint: https://api.groq.com/openai/v1/chat/completions
+supports_completion: false
+
+api_key: gsk_...
+
+running_models:
+  - qwen-qwq-32b
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 131072
+    supports_tools: true
+    supports_agent: true
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/lmstudio.yaml b/refact-agent/engine/src/yaml_configs/default_providers/lmstudio.yaml
new file mode 100644
index 000000000..ab704fe33
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/lmstudio.yaml
@@ -0,0 +1,11 @@
+completion_endpoint: http://localhost:1234/v1/completions
+chat_endpoint:       http://localhost:1234/v1/chat/completions
+supports_completion: true
+
+api_key: any-will-work
+
+model_default_settings_ui:
+  chat:
+    tokenizer: fake
+  completion:
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/ollama.yaml b/refact-agent/engine/src/yaml_configs/default_providers/ollama.yaml
new file mode 100644
index 000000000..055bdb3d6
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/ollama.yaml
@@ -0,0 +1,32 @@
+completion_endpoint: "http://localhost:11434/v1/completions"
+chat_endpoint:       "http://localhost:11434/v1/chat/completions"
+supports_completion: true
+
+api_key: any-will-work
+
+completion_models:
+  qwen2.5-coder:1.5b-base:
+    n_ctx: 2048
+    model_family: qwen2.5-coder-base
+    tokenizer: hf://Qwen/Qwen2.5-Coder-1.5B
+    scratchpad: FIM-PSM
+    scratchpad_patch:
+      fim_prefix: <|fim_prefix|>
+      fim_suffix: <|fim_suffix|>
+      fim_middle: <|fim_middle|>
+      eot: <|endoftext|>
+      extra_stop_tokens:
+      - <|repo_name|>
+      - <|file_sep|>
+      - <|fim_pad|>
+      - <|cursor|>
+      context_format: qwen2.5
+      rag_ratio: 0.5
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 2048
+    tokenizer: fake
+  completion:
+    n_ctx: 2048
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/openai.yaml b/refact-agent/engine/src/yaml_configs/default_providers/openai.yaml
new file mode 100644
index 000000000..b4e6aebeb
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/openai.yaml
@@ -0,0 +1,24 @@
+chat_endpoint:       "https://api.openai.com/v1/chat/completions"
+embedding_endpoint:  "https://api.openai.com/v1/embeddings"
+supports_completion: false
+
+api_key: "sk-..."
+
+running_models:
+  - gpt-4.1
+  - gpt-4o
+  - gpt-4.1-mini
+  - gpt-4o-mini
+  - gpt-4.1-nano
+  - gpt-4.5-preview
+  - o3-mini
+  - o1-mini
+  - o1
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_agent: true
+    tokenizer: hf://Xenova/gpt-4o
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/openrouter.yaml b/refact-agent/engine/src/yaml_configs/default_providers/openrouter.yaml
new file mode 100644
index 000000000..b2a1a4966
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/openrouter.yaml
@@ -0,0 +1,14 @@
+chat_endpoint:       https://openrouter.ai/api/v1/chat/completions
+embedding_endpoint:  https://openrouter.ai/api/v1/embeddings
+supports_completion: false
+
+api_key: "sk-or-..."
+
+running_models:
+  - anthropic/claude-3.7-sonnet
+  - openai/o3-mini
+  - openai/gpt-4.1
+
+model_default_settings_ui:
+  chat:
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/xai.yaml b/refact-agent/engine/src/yaml_configs/default_providers/xai.yaml
new file mode 100644
index 000000000..5fa040aca
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/xai.yaml
@@ -0,0 +1,15 @@
+chat_endpoint: https://api.x.ai/v1/chat/completions
+supports_completion: false
+
+api_key: xai-...
+
+running_models:
+  - grok-3-beta
+  - grok-3-mini-beta
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 131072
+    supports_tools: true
+    supports_agent: true
+    tokenizer: hf://Xenova/grok-1-tokenizer
\ No newline at end of file
diff --git a/refact-agent/gui/README.md b/refact-agent/gui/README.md
index eaa45f1d3..e2fd57912 100644
--- a/refact-agent/gui/README.md
+++ b/refact-agent/gui/README.md
@@ -339,8 +339,8 @@ type ChatUserMessageResponse = {
 };
 
 type CapsResponse = {
-  code_chat_default_model: string;
-  code_chat_models: Record;
+  chat_default_model: string;
+  chat_models: Record;
 };
 
 type CodeCompletionModel = {
diff --git a/refact-agent/gui/src/__fixtures__/caps.ts b/refact-agent/gui/src/__fixtures__/caps.ts
index 75a353265..24d706ec4 100644
--- a/refact-agent/gui/src/__fixtures__/caps.ts
+++ b/refact-agent/gui/src/__fixtures__/caps.ts
@@ -3,13 +3,7 @@ import { CapsResponse } from "../services/refact";
 export const STUB_CAPS_RESPONSE: CapsResponse = {
   cloud_name: "Refact",
   endpoint_style: "openai",
-
-  endpoint_template: "https://inference.smallcloud.ai/v1/completions",
-
-  endpoint_chat_passthrough:
-    "https://inference.smallcloud.ai/v1/chat/completions",
-  tokenizer_path_template:
-    "https://huggingface.co/$MODEL/resolve/main/tokenizer.json",
+  code_completion_n_ctx: 4000,
   tokenizer_rewrite_path: {
     "o1-mini": "Xenova/gpt-4o",
     "gpt-4-turbo-2024-04-09": "Xenova/gpt-4",
@@ -39,292 +33,424 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
     "starcoder2/3b": "bigcode/starcoder2-3b",
   },
   telemetry_basic_dest: "https://www.smallcloud.ai/v1/telemetry-basic",
-
-  code_completion_models: {
-    "Refact/1.6B": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-SPM": {},
-      },
-      default_scratchpad: "FIM-SPM",
-      similar_models: ["Refact/1.6B", "Refact/1.6B/vllm"],
-      supports_tools: false,
-      supports_multimodality: false,
+  telemetry_basic_retrieve_my_own:
+    "https://staging.smallcloud.ai/v1/telemetry-retrieve-my-own-stats",
+  tokenizer_path_template:
+    "https://huggingface.co/$MODEL/resolve/main/tokenizer.json",
+  endpoint_chat_passthrough:
+    "https://inference.smallcloud.ai/v1/chat/completions",
+  endpoint_template: "https://inference.smallcloud.ai/v1/completions",
+  completion_models: {
+    "Refact/smallcloudai/Refact-1_6B-fim": {
+      n_ctx: 4000,
+      name: "smallcloudai/Refact-1_6B-fim",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/Refact/1.6B": {
+      n_ctx: 4000,
+      name: "Refact/1.6B",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/starcoder2/3b": {
+      n_ctx: 4000,
+      name: "starcoder2/3b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/qwen2.5/coder/1.5b/base": {
+      n_ctx: 4000,
+      name: "qwen2.5/coder/1.5b/base",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gpt-4o": {
+      n_ctx: 4000,
+      name: "gpt-4o",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gpt-4o-mini": {
+      n_ctx: 4000,
+      name: "gpt-4o-mini",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/claude-3-5-sonnet": {
+      n_ctx: 4000,
+      name: "claude-3-5-sonnet",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/claude-3-5-haiku": {
+      n_ctx: 4000,
+      name: "claude-3-5-haiku",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/claude-3-7-sonnet": {
+      n_ctx: 4000,
+      name: "claude-3-7-sonnet",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/groq-llama-3.1-8b": {
+      n_ctx: 4000,
+      name: "groq-llama-3.1-8b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/groq-llama-3.1-70b": {
+      n_ctx: 4000,
+      name: "groq-llama-3.1-70b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-2.0-flash-exp": {
+      n_ctx: 4000,
+      name: "gemini-2.0-flash-exp",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-1.5-flash": {
+      n_ctx: 4000,
+      name: "gemini-1.5-flash",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-1.5-flash-8b": {
+      n_ctx: 4000,
+      name: "gemini-1.5-flash-8b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-1.5-pro": {
+      n_ctx: 4000,
+      name: "gemini-1.5-pro",
+      enabled: true,
+      type: "completion",
+      model_family: null,
+    },
+    "Refact/gemini-2.0-exp-advanced": {
+      n_ctx: 4000,
+      name: "gemini-2.0-exp-advanced",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/grok-2": {
+      n_ctx: 4000,
+      name: "grok-2",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/deepseek-chat": {
+      n_ctx: 4000,
+      name: "deepseek-chat",
+      type: "completion",
+      enabled: true,
+      model_family: null,
+    },
+  },
+  chat_models: {
+    "Refact/gpt-4o": {
+      n_ctx: 128000,
+      name: "gpt-4o",
+      id: "Refact/gpt-4o",
+      type: "chat",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "groq-llama-3.1-8b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/gpt-4o-mini": {
+      n_ctx: 128000,
+      name: "gpt-4o-mini",
+      id: "Refact/gpt-4o-mini",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
-      supports_multimodality: false,
+      type: "chat",
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "qwen2.5/coder/1.5b/base": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-PSM": {
-          fim_prefix: "<|fim_prefix|>",
-          fim_suffix: "<|fim_suffix|>",
-          fim_middle: "<|fim_middle|>",
-          eot: "<|endoftext|>",
-          extra_stop_tokens: ["<|repo_name|>", "<|file_sep|>", "<|fim_pad|>"],
-          context_format: "qwen2.5",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "FIM-PSM",
-      similar_models: [
-        "qwen2.5/coder/1.5b/base",
-        "qwen2.5/coder/3b/base",
-        "qwen2.5/coder/7b/base",
-        "qwen2.5/coder/14b/base",
-        "qwen2.5/coder/32b/base",
-        "qwen2.5/coder/0.5b/base/vllm",
-        "qwen2.5/coder/1.5b/base/vllm",
-        "qwen2.5/coder/3b/base/vllm",
-        "qwen2.5/coder/7b/base/vllm",
-        "qwen2.5/coder/14b/base/vllm",
-        "qwen2.5/coder/32b/base/vllm",
-      ],
-      supports_tools: false,
-      supports_multimodality: false,
+    "Refact/o1": {
+      n_ctx: 200000,
+      name: "o1",
+      id: "Refact/o1",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
+      type: "chat",
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: "openai",
+      supports_boost_reasoning: true,
+      default_temperature: null,
     },
-    "gpt-4o": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-4o-2024-05-13",
-        "gpt-4o-2024-08-06",
-        "openai/gpt-4o",
-      ],
+    "Refact/o1-mini": {
+      n_ctx: 128000,
+      name: "o1-mini",
+      id: "Refact/o1-mini",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      type: "chat",
+      supports_agent: false,
+      supports_reasoning: "openai",
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4o-mini": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: ["gpt-4o-mini-2024-07-18"],
+    "Refact/o3-mini": {
+      n_ctx: 200000,
+      name: "o3-mini",
+      id: "Refact/o3-mini",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
+      type: "chat",
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: "openai",
+      supports_boost_reasoning: true,
+      default_temperature: null,
     },
-    "smallcloudai/Refact-1_6B-fim": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-SPM": {},
-      },
-      default_scratchpad: "FIM-SPM",
-      similar_models: ["Refact/1.6B", "Refact/1.6B/vllm"],
-      supports_tools: false,
-      supports_multimodality: false,
+    "Refact/claude-3-5-sonnet": {
+      n_ctx: 200000,
+      name: "claude-3-5-sonnet",
+      id: "Refact/claude-3-5-sonnet",
+      enabled: true,
+      type: "chat",
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "groq-llama-3.1-70b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/claude-3-5-haiku": {
+      type: "chat",
+      n_ctx: 200000,
+      name: "claude-3-5-haiku",
+      id: "Refact/claude-3-5-haiku",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "starcoder2/3b": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-PSM": {
-          context_format: "starcoder",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "FIM-PSM",
-      similar_models: [
-        "bigcode/starcoderbase",
-        "starcoder/15b/base",
-        "starcoder/15b/plus",
-        "starcoder/1b/base",
-        "starcoder/3b/base",
-        "starcoder/7b/base",
-        "wizardcoder/15b",
-        "starcoder/1b/vllm",
-        "starcoder/3b/vllm",
-        "starcoder/7b/vllm",
-        "starcoder2/3b/base",
-        "starcoder2/7b/base",
-        "starcoder2/15b/base",
-        "starcoder2/3b/vllm",
-        "starcoder2/7b/vllm",
-        "starcoder2/15b/vllm",
-        "starcoder2/3b/neuron",
-        "starcoder2/7b/neuron",
-        "starcoder2/15b/neuron",
-        "starcoder2/3b",
-        "starcoder2/7b",
-        "starcoder2/15b",
-        "bigcode/starcoder2-3b",
-        "bigcode/starcoder2-7b",
-        "bigcode/starcoder2-15b",
-      ],
-      supports_tools: false,
+    "Refact/claude-3-7-sonnet": {
+      type: "chat",
+      n_ctx: 200000,
+      name: "claude-3-7-sonnet",
+      id: "Refact/claude-3-7-sonnet",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
+      supports_clicks: true,
+      supports_agent: true,
+      supports_reasoning: "anthropic",
+      supports_boost_reasoning: true,
+      default_temperature: null,
+    },
+    "Refact/groq-llama-3.1-8b": {
+      type: "chat",
+      n_ctx: 128000,
+      name: "groq-llama-3.1-8b",
+      id: "Refact/groq-llama-3.1-8b",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-  },
-  code_completion_default_model: "qwen2.5/coder/1.5b/base",
-  code_completion_n_ctx: 4000,
-  code_chat_models: {
-    "groq-llama-3.1-70b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/groq-llama-3.1-70b": {
+      type: "chat",
+      n_ctx: 128000,
+      name: "groq-llama-3.1-70b",
+      id: "Refact/groq-llama-3.1-70b",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-3.5-turbo": {
-      n_ctx: 16000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-3.5-turbo-1106",
-        "gpt-3.5-turbo-0125",
-        "gpt-4",
-        "gpt-4-turbo",
-        "gpt-4-turbo-2024-04-09",
-        "openai/gpt-3.5-turbo",
-        "openai/gpt-4",
-        "openai/gpt-4-turbo",
-      ],
+    "Refact/gemini-2.0-flash-exp": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-2.0-flash-exp",
+      id: "Refact/gemini-2.0-flash-exp",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
-      supports_multimodality: false,
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4o": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-4o-2024-05-13",
-        "gpt-4o-2024-08-06",
-        "openai/gpt-4o",
-      ],
+    "Refact/gemini-1.5-flash": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-1.5-flash",
+      id: "Refact/gemini-1.5-flash",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: true,
       supports_clicks: false,
-      supports_agent: true,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
+    },
+    "Refact/gemini-1.5-flash-8b": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-1.5-flash-8b",
+      id: "Refact/gemini-1.5-flash-8b",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
+      supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4o-mini": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: ["gpt-4o-mini-2024-07-18"],
+    "Refact/gemini-1.5-pro": {
+      type: "chat",
+      n_ctx: 2000000,
+      name: "gemini-1.5-pro",
+      id: "Refact/gemini-1.5-pro",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "claude-3-5-sonnet": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: ["claude-3-5-sonnet-20240620"],
+    "Refact/gemini-2.0-exp-advanced": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-2.0-exp-advanced",
+      id: "Refact/gemini-2.0-exp-advanced",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: true,
       supports_clicks: false,
       supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "groq-llama-3.1-8b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/grok-2": {
+      type: "chat",
+      n_ctx: 128000,
+      name: "grok-2",
+      id: "Refact/grok-2",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4-turbo": {
-      n_ctx: 16000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-3.5-turbo-1106",
-        "gpt-3.5-turbo-0125",
-        "gpt-4",
-        "gpt-4-turbo",
-        "gpt-4-turbo-2024-04-09",
-        "openai/gpt-3.5-turbo",
-        "openai/gpt-4",
-        "openai/gpt-4-turbo",
-      ],
+    "Refact/deepseek-chat": {
+      type: "chat",
+      n_ctx: 64000,
+      name: "deepseek-chat",
+      id: "Refact/deepseek-chat",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
+    "Refact/deepseek-reasoner": {
+      type: "chat",
+      n_ctx: 64000,
+      name: "deepseek-reasoner",
+      id: "Refact/deepseek-reasoner",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: false,
+      supports_multimodality: false,
+      supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: "deepseek",
+      supports_boost_reasoning: false,
+      default_temperature: 0.6,
+    },
+  },
+  embedding_model: {
+    type: "embedding",
+    n_ctx: 512,
+    enabled: true,
+    tokenizer: "fake",
+    embedding_size: 1536,
+    name: "thenlper/gte-base",
+    id: "Refact/thenlper/gte-base",
+    rejection_threshold: 0.25,
+    embedding_batch: 64,
   },
-  code_chat_default_model: "gpt-4o-mini",
   running_models: [
     "smallcloudai/Refact-1_6B-fim",
     "Refact/1.6B",
@@ -339,20 +465,25 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
     "groq-llama-3.1-8b",
     "groq-llama-3.1-70b",
   ],
+  completion_default_model: "Refact/1.6B",
+  chat_default_model: "gpt-4o",
+  chat_thinking_model: "",
+  chat_light_model: "",
+  caps_version: 0,
   code_chat_default_system_prompt: "default",
   support_metadata: true,
-  caps_version: 0,
+  customization: "",
 };
 
 export const EMPTY_CAPS_RESPONSE: CapsResponse = {
   support_metadata: false,
   caps_version: 0,
   cloud_name: "",
-  code_chat_default_model: "",
+  chat_default_model: "",
   code_chat_default_system_prompt: "",
-  code_chat_models: {},
-  code_completion_default_model: "",
-  code_completion_models: {},
+  chat_models: {},
+  completion_default_model: "",
+  completion_models: {},
   code_completion_n_ctx: 0,
   endpoint_chat_passthrough: "",
   endpoint_style: "",
@@ -360,5 +491,9 @@ export const EMPTY_CAPS_RESPONSE: CapsResponse = {
   running_models: [],
   telemetry_basic_dest: "",
   tokenizer_path_template: "",
+  customization: "",
   tokenizer_rewrite_path: {},
+  chat_light_model: "",
+  chat_thinking_model: "",
+  telemetry_basic_retrieve_my_own: "",
 };
diff --git a/refact-agent/gui/src/__fixtures__/context_files.ts b/refact-agent/gui/src/__fixtures__/context_files.ts
index 198353e45..67b5f68a7 100644
--- a/refact-agent/gui/src/__fixtures__/context_files.ts
+++ b/refact-agent/gui/src/__fixtures__/context_files.ts
@@ -5,8 +5,8 @@ const some_text = `import { CapsResponse } from "../services/refact";
 export const STUB_CAPS_RESPONSE: CapsResponse = {
   caps_version: 0,
   cloud_name: "Refact",
-  code_chat_default_model: "gpt-3.5-turbo",
-  code_chat_models: {
+  chat_default_model: "gpt-3.5-turbo",
+  chat_models: {
     "gpt-3.5-turbo": {
       default_scratchpad: "",
       n_ctx: 4096,
@@ -30,8 +30,8 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
       },
     },
   },
-  code_completion_default_model: "smallcloudai/Refact-1_6B-fim",
-  code_completion_models: {
+  completion_default_model: "smallcloudai/Refact-1_6B-fim",
+  completion_models: {
     "smallcloudai/Refact-1_6B-fim": {
       default_scratchpad: "FIM-SPM",
       n_ctx: 4096,
diff --git a/refact-agent/gui/src/app/middleware.ts b/refact-agent/gui/src/app/middleware.ts
index f00f7c3ef..2226c8727 100644
--- a/refact-agent/gui/src/app/middleware.ts
+++ b/refact-agent/gui/src/app/middleware.ts
@@ -14,6 +14,7 @@ import {
   upsertToolCall,
   sendCurrentChatToLspAfterToolCallUpdate,
   chatResponse,
+  chatError,
 } from "../features/Chat/Thread";
 import { statisticsApi } from "../services/refact/statistics";
 import { integrationsApi } from "../services/refact/integrations";
@@ -21,7 +22,11 @@ import { dockerApi } from "../services/refact/docker";
 import { capsApi, isCapsErrorResponse } from "../services/refact/caps";
 import { promptsApi } from "../services/refact/prompts";
 import { toolsApi } from "../services/refact/tools";
-import { commandsApi, isDetailMessage } from "../services/refact/commands";
+import {
+  commandsApi,
+  isDetailMessage,
+  isDetailMessageWithErrorType,
+} from "../services/refact/commands";
 import { pathApi } from "../services/refact/path";
 import { pingApi } from "../services/refact/ping";
 import {
@@ -44,7 +49,7 @@ import {
   ideForceReloadProjectTreeFiles,
 } from "../hooks/useEventBusForIDE";
 import { upsertToolCallIntoHistory } from "../features/History/historySlice";
-import { isToolResponse } from "../events";
+import { isToolResponse, modelsApi, providersApi } from "../services/refact";
 
 const AUTH_ERROR_MESSAGE =
   "There is an issue with your API key. Check out your API Key or re-login";
@@ -298,6 +303,40 @@ startListening({
     ) {
       listenerApi.dispatch(setError(action.payload));
     }
+
+    if (
+      (providersApi.endpoints.updateProvider.matchRejected(action) ||
+        providersApi.endpoints.getProvider.matchRejected(action) ||
+        providersApi.endpoints.getProviderTemplates.matchRejected(action) ||
+        providersApi.endpoints.getConfiguredProviders.matchRejected(action)) &&
+      !action.meta.condition
+    ) {
+      const errorStatus = action.payload?.status;
+      const isAuthError = errorStatus === 401;
+      const message = isAuthError
+        ? AUTH_ERROR_MESSAGE
+        : isDetailMessage(action.payload?.data)
+          ? action.payload.data.detail
+          : `provider update error.`;
+
+      listenerApi.dispatch(setError(message));
+      listenerApi.dispatch(setIsAuthError(isAuthError));
+    }
+    if (
+      modelsApi.endpoints.getModels.matchRejected(action) &&
+      !action.meta.condition
+    ) {
+      const errorStatus = action.payload?.status;
+      const isAuthError = errorStatus === 401;
+      const message = isAuthError
+        ? AUTH_ERROR_MESSAGE
+        : isDetailMessage(action.payload?.data)
+          ? action.payload.data.detail
+          : `provider update error.`;
+
+      listenerApi.dispatch(setError(message));
+      listenerApi.dispatch(setIsAuthError(isAuthError));
+    }
   },
 });
 
@@ -375,8 +414,6 @@ startListening({
     pathApi.endpoints.customizationPath.matchRejected,
     pathApi.endpoints.privacyPath.matchFulfilled,
     pathApi.endpoints.privacyPath.matchRejected,
-    pathApi.endpoints.bringYourOwnKeyPath.matchFulfilled,
-    pathApi.endpoints.bringYourOwnKeyPath.matchRejected,
     pathApi.endpoints.integrationsPath.matchFulfilled,
     pathApi.endpoints.integrationsPath.matchRejected,
   ),
@@ -390,10 +427,13 @@ startListening({
           : state.chat.thread;
       const scope = `sendChat_${thread.model}_${mode}`;
 
-      const errorMessage = isDetailMessage(action.payload)
-        ? action.payload.detail
-        : null;
-      if (errorMessage) {
+      if (isDetailMessageWithErrorType(action.payload)) {
+        const errorMessage = action.payload.detail;
+        listenerApi.dispatch(
+          action.payload.errorType === "GLOBAL"
+            ? setError(errorMessage)
+            : chatError({ id: chatId, message: errorMessage }),
+        );
         const thunk = telemetryApi.endpoints.sendTelemetryChatEvent.initiate({
           scope,
           success: false,
@@ -446,7 +486,6 @@ startListening({
     if (
       pathApi.endpoints.customizationPath.matchFulfilled(action) ||
       pathApi.endpoints.privacyPath.matchFulfilled(action) ||
-      pathApi.endpoints.bringYourOwnKeyPath.matchFulfilled(action) ||
       pathApi.endpoints.integrationsPath.matchFulfilled(action)
     ) {
       const thunk = telemetryApi.endpoints.sendTelemetryNetEvent.initiate({
@@ -461,7 +500,6 @@ startListening({
     if (
       (pathApi.endpoints.customizationPath.matchRejected(action) ||
         pathApi.endpoints.privacyPath.matchRejected(action) ||
-        pathApi.endpoints.bringYourOwnKeyPath.matchRejected(action) ||
         pathApi.endpoints.integrationsPath.matchRejected(action)) &&
       !action.meta.condition
     ) {
diff --git a/refact-agent/gui/src/app/store.ts b/refact-agent/gui/src/app/store.ts
index 49b48ee80..a227989cc 100644
--- a/refact-agent/gui/src/app/store.ts
+++ b/refact-agent/gui/src/app/store.ts
@@ -22,6 +22,8 @@ import {
   dockerApi,
   telemetryApi,
   knowledgeApi,
+  providersApi,
+  modelsApi,
 } from "../services/refact";
 import { smallCloudApi } from "../services/smallcloud";
 import { reducer as fimReducer } from "../features/FIM/reducer";
@@ -100,6 +102,8 @@ const rootReducer = combineSlices(
     [checkpointsApi.reducerPath]: checkpointsApi.reducer,
     [telemetryApi.reducerPath]: telemetryApi.reducer,
     [knowledgeApi.reducerPath]: knowledgeApi.reducer,
+    [providersApi.reducerPath]: providersApi.reducer,
+    [modelsApi.reducerPath]: modelsApi.reducer,
   },
   historySlice,
   errorSlice,
@@ -186,6 +190,8 @@ export function setUpStore(preloadedState?: Partial) {
             checkpointsApi.middleware,
             telemetryApi.middleware,
             knowledgeApi.middleware,
+            providersApi.middleware,
+            modelsApi.middleware,
           )
           .prepend(historyMiddleware.middleware)
           // .prepend(errorMiddleware.middleware)
diff --git a/refact-agent/gui/src/components/Chat/Chat.tsx b/refact-agent/gui/src/components/Chat/Chat.tsx
index a28b815d7..b2bff2b49 100644
--- a/refact-agent/gui/src/components/Chat/Chat.tsx
+++ b/refact-agent/gui/src/components/Chat/Chat.tsx
@@ -135,8 +135,7 @@ export const Chat: React.FC = ({
               
                 
                   model:{" "}
-                  {capsForToolUse.currentModel ||
-                    caps.data?.code_chat_default_model}{" "}
+                  {capsForToolUse.currentModel || caps.data?.chat_default_model}{" "}
                 {" "}
                 •{" "}
                  void;
+export type DeletePopoverProps = {
+  isDisabled: boolean;
+  isDeleting: boolean;
+  itemName: string;
+  deleteBy: string;
+  handleDelete: (deleteBy: string) => void;
 };
 
-export const IntegrationDeletePopover: FC = ({
-  isApplying,
-  isDeletingIntegration,
-  integrationName,
-  integrationConfigPath,
-  handleDeleteIntegration,
+export const DeletePopover: FC = ({
+  deleteBy,
+  itemName,
+  handleDelete,
+  isDeleting,
+  isDisabled,
 }) => {
   return (
     
@@ -35,18 +35,12 @@ export const IntegrationDeletePopover: FC = ({
           variant="outline"
           type="button"
           size="2"
-          title={"Delete configuration data of this integration"}
-          className={classNames(
-            {
-              [styles.disabledButton]: isDeletingIntegration || isApplying,
-            },
-            // styles.button,
-          )}
-          disabled={isDeletingIntegration || isApplying}
+          title={"Delete configuration data"}
+          className={classNames({
+            [styles.disabledButton]: isDeleting || isDisabled,
+          })}
+          disabled={isDeleting || isDisabled}
         >
-          {/* {isDeletingIntegration
-            ? "Deleting configuration..."
-            : "Delete configuration"} */}
           
         
       
@@ -59,7 +53,7 @@ export const IntegrationDeletePopover: FC = ({
                   Destructive action
                 
                 
-                  Do you really want to delete {integrationName}
+                  Do you really want to delete {itemName}
                   's configuration data?
                 
               
@@ -70,12 +64,7 @@ export const IntegrationDeletePopover: FC = ({
                     size="2"
                     variant="solid"
                     color="red"
-                    onClick={() =>
-                      handleDeleteIntegration(
-                        integrationConfigPath,
-                        integrationName,
-                      )
-                    }
+                    onClick={() => handleDelete(deleteBy)}
                   >
                     Delete
                   
diff --git a/refact-agent/gui/src/components/DeletePopover/index.ts b/refact-agent/gui/src/components/DeletePopover/index.ts
new file mode 100644
index 000000000..23144d8f9
--- /dev/null
+++ b/refact-agent/gui/src/components/DeletePopover/index.ts
@@ -0,0 +1 @@
+export { DeletePopover } from "./DeletePopover";
diff --git a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css
index 78c7c7659..8dc29dadb 100644
--- a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css
+++ b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css
@@ -27,11 +27,3 @@
   max-width: 30px;
   object-fit: cover;
 }
-
-.availabilitySwitch {
-  cursor: pointer;
-}
-
-.disabledAvailabilitySwitch {
-  cursor: not-allowed;
-}
diff --git a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx
index e43d18b34..c03c03897 100644
--- a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx
@@ -1,7 +1,7 @@
-import { FC, MouseEventHandler } from "react";
+import type { FC, MouseEventHandler } from "react";
 import classNames from "classnames";
 
-import { Badge, Card, Flex, Text } from "@radix-ui/themes";
+import { Card, Flex, Text } from "@radix-ui/themes";
 import { useAppSelector } from "../../../hooks";
 import { useUpdateIntegration } from "./useUpdateIntegration";
 
@@ -15,6 +15,7 @@ import { formatIntegrationIconPath } from "../../../utils/formatIntegrationIconP
 import { getIntegrationInfo } from "../../../utils/getIntegrationInfo";
 
 import styles from "./IntegrationCard.module.css";
+import { OnOffSwitch } from "../../OnOffSwitch/OnOffSwitch";
 
 type IntegrationCardProps = {
   integration:
@@ -54,11 +55,6 @@ export const IntegrationCard: FC = ({
     void updateIntegrationAvailability();
   };
 
-  const switches = [
-    { label: "On", leftRadius: true },
-    { label: "Off", rightRadius: true },
-  ];
-
   return (
      = ({
             {displayName}
           
           {!isNotConfigured && (
-            
-              {switches.map(({ label, leftRadius }) => {
-                const isOn = label === "On";
-                const isActive =
-                  isOn === integrationAvailability.on_your_laptop;
-
-                return (
-                  
-                    {label}
-                  
-                );
-              })}
-            
+            
           )}
         
       
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx
index d5f3b474c..909355bc2 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx
@@ -1,14 +1,14 @@
 import { ExclamationTriangleIcon } from "@radix-ui/react-icons";
 import { Badge, Button, Flex, Text } from "@radix-ui/themes";
 import { FC } from "react";
-import { IntegrationDeletePopover } from "../IntegrationDeletePopover";
 import { Integration } from "../../../services/refact";
 import { useAppSelector, useEventsBusForIDE } from "../../../hooks";
 import { selectConfig } from "../../../features/Config/configSlice";
+import { DeletePopover } from "../../DeletePopover";
 
 type ErrorStateProps = {
   integration: Integration;
-  onDelete: (path: string, name: string) => void;
+  onDelete: (path: string) => void;
   isApplying: boolean;
   isDeletingIntegration: boolean;
 };
@@ -51,12 +51,12 @@ export const ErrorState: FC = ({
             Open {integr_name}.yaml
           
         )}
-        
       
     
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx
index 6ad1770a0..065fcc092 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx
@@ -4,7 +4,7 @@ import styles from "./IntegrationForm.module.css";
 import { FC } from "react";
 import { Integration } from "../../../services/refact";
 import { IntegrationAvailability } from "./IntegrationAvailability";
-import { IntegrationDeletePopover } from "../IntegrationDeletePopover";
+import { DeletePopover } from "../../DeletePopover";
 
 type FormAvailabilityAndDeleteProps = {
   integration: Integration;
@@ -12,7 +12,7 @@ type FormAvailabilityAndDeleteProps = {
   isApplying: boolean;
   isDeletingIntegration: boolean;
   handleAvailabilityChange: (fieldName: string, value: boolean) => void;
-  onDelete: (path: string, name: string) => void;
+  onDelete: (path: string) => void;
 };
 
 export const FormAvailabilityAndDelete: FC = ({
@@ -44,12 +44,12 @@ export const FormAvailabilityAndDelete: FC = ({
             />
           ))}
       
-      
     
   );
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx
index e64f81a60..9dd353a05 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx
@@ -36,7 +36,7 @@ type IntegrationFormProps = {
   MCPArguments: string[];
   MCPEnvironmentVariables: Record;
   handleSubmit: (event: FormEvent) => void;
-  handleDeleteIntegration: (path: string, name: string) => void;
+  handleDeleteIntegration: (path: string) => void;
   handleChange: (event: FormEvent) => void;
   onSchema: (schema: Integration["integr_schema"]) => void;
   onValues: (values: Integration["integr_values"]) => void;
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx
index c78aaf673..58d3a75c6 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx
@@ -110,9 +110,7 @@ export const IntegrationsView: FC = ({
       
          void handleSubmit(event)}
-          handleDeleteIntegration={(path, name) =>
-            void handleDeleteIntegration(path, name)
-          }
+          handleDeleteIntegration={(path) => void handleDeleteIntegration(path)}
           integrationPath={currentIntegration.integr_config_path}
           isApplying={isApplyingIntegrationForm}
           isDeletingIntegration={isDeletingIntegration}
diff --git a/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts b/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts
index db25974f9..aaabbea02 100644
--- a/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts
+++ b/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts
@@ -620,8 +620,8 @@ export const useIntegrations = ({
   );
 
   const handleDeleteIntegration = useCallback(
-    async (configurationPath: string, integrationName: string) => {
-      // if (!currentIntegration) return;
+    async (configurationPath: string) => {
+      if (!currentIntegration) return;
       setIsDeletingIntegration(true);
       const response = await deleteIntegrationTrigger(configurationPath);
       debugIntegrations("[DEBUG]: response: ", response);
@@ -632,7 +632,7 @@ export const useIntegrations = ({
       dispatch(
         setInformation(
           `${toPascalCase(
-            integrationName,
+            currentIntegration.integr_name,
           )} integration's configuration was deleted successfully!`,
         ),
       );
@@ -642,7 +642,7 @@ export const useIntegrations = ({
         clearTimeout(timeoutId);
       }, 1200);
     },
-    [dispatch, deleteIntegrationTrigger, handleFormReturn],
+    [currentIntegration, dispatch, deleteIntegrationTrigger, handleFormReturn],
   );
 
   const handleIntegrationFormChange = useCallback(
diff --git a/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.module.css b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.module.css
new file mode 100644
index 000000000..9bd21d870
--- /dev/null
+++ b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.module.css
@@ -0,0 +1,12 @@
+.switch {
+  cursor: pointer;
+}
+
+.disabled {
+  cursor: not-allowed;
+}
+
+.unavailable {
+  cursor: not-allowed;
+  opacity: 0.35;
+}
diff --git a/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.tsx b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.tsx
new file mode 100644
index 000000000..359ef6095
--- /dev/null
+++ b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.tsx
@@ -0,0 +1,61 @@
+import classNames from "classnames";
+import { Badge, Flex } from "@radix-ui/themes";
+
+import styles from "./OnOffSwitch.module.css";
+import React, { MouseEventHandler } from "react";
+
+const switches = [
+  { label: "On", leftRadius: true },
+  { label: "Off", rightRadius: true },
+];
+
+export type OnOffSwitchProps = {
+  isEnabled: boolean;
+  isUnavailable?: boolean;
+  isUpdating?: boolean;
+  handleClick: MouseEventHandler;
+};
+
+export const OnOffSwitch: React.FC = ({
+  isEnabled,
+  isUnavailable = false,
+  isUpdating = false,
+  handleClick,
+}) => {
+  return (
+    
+      {switches.map(({ label, leftRadius }) => {
+        const isOn = label === "On";
+        const isActive = isOn === isEnabled;
+
+        return (
+          
+            {label}
+          
+        );
+      })}
+    
+  );
+};
diff --git a/refact-agent/gui/src/components/OnOffSwitch/index.ts b/refact-agent/gui/src/components/OnOffSwitch/index.ts
new file mode 100644
index 000000000..341a3aba9
--- /dev/null
+++ b/refact-agent/gui/src/components/OnOffSwitch/index.ts
@@ -0,0 +1 @@
+export { OnOffSwitch } from "./OnOffSwitch";
diff --git a/refact-agent/gui/src/components/Toolbar/Dropdown.tsx b/refact-agent/gui/src/components/Toolbar/Dropdown.tsx
index c3702f802..09ce17d43 100644
--- a/refact-agent/gui/src/components/Toolbar/Dropdown.tsx
+++ b/refact-agent/gui/src/components/Toolbar/Dropdown.tsx
@@ -6,7 +6,6 @@ import {
 } from "../../features/Config/configSlice";
 import { useTourRefs } from "../../features/Tour";
 import {
-  useConfig,
   useEventsBusForIDE,
   useGetUser,
   useLogout,
@@ -27,6 +26,7 @@ import {
   HamburgerMenuIcon,
   DiscordLogoIcon,
   QuestionMarkCircledIcon,
+  GearIcon,
 } from "@radix-ui/react-icons";
 import { clearHistory } from "../../features/History/historySlice";
 import { KnowledgeListPage } from "../../features/Pages/pagesSlice";
@@ -41,6 +41,7 @@ export type DropdownNavigationOptions =
   | "restart tour"
   | "login page"
   | "integrations"
+  | "providers"
   | KnowledgeListPage["name"]
   | "";
 
@@ -79,7 +80,6 @@ export const Dropdown: React.FC = ({
   const dispatch = useAppDispatch();
   const { maxAgentUsageAmount, currentAgentUsage } = useAgentUsage();
   const logout = useLogout();
-  const { addressURL } = useConfig();
   const knowledgeEnabled = useAppSelector(selectKnowledgeFeature);
   const { startPollingForUser } = useAgentUsage();
 
@@ -87,8 +87,7 @@ export const Dropdown: React.FC = ({
   const discordUrl = "https://www.smallcloud.ai/discord";
   const accountLink = linkForAccount(host);
   const openUrl = useOpenUrl();
-  const { openBringYourOwnKeyFile, openCustomizationFile, openPrivacyFile } =
-    useEventsBusForIDE();
+  const { openCustomizationFile, openPrivacyFile } = useEventsBusForIDE();
 
   const handleChatHistoryCleanUp = () => {
     dispatch(clearHistory());
@@ -188,6 +187,10 @@ export const Dropdown: React.FC = ({
            Set up Agent Integrations
         
 
+         handleNavigation("providers")}>
+           Configure Providers
+        
+
         {knowledgeEnabled && (
            handleNavigation("knowledge list")}
@@ -220,16 +223,6 @@ export const Dropdown: React.FC = ({
           Edit privacy.yaml
         
 
-        {addressURL?.endsWith(".yaml") && (
-           {
-              void openBringYourOwnKeyFile();
-            }}
-          >
-            Edit Bring Your Own Key
-          
-        )}
-
         
 
          handleNavigation("restart tour")}>
diff --git a/refact-agent/gui/src/components/Toolbar/Toolbar.tsx b/refact-agent/gui/src/components/Toolbar/Toolbar.tsx
index df7b1a81b..c8ad01c86 100644
--- a/refact-agent/gui/src/components/Toolbar/Toolbar.tsx
+++ b/refact-agent/gui/src/components/Toolbar/Toolbar.tsx
@@ -141,6 +141,13 @@ export const Toolbar = ({ activeTab }: ToolbarProps) => {
           success: true,
           error_message: "",
         });
+      } else if (to === "providers") {
+        dispatch(push({ name: "providers page" }));
+        void sendTelemetryEvent({
+          scope: `openProviders`,
+          success: true,
+          error_message: "",
+        });
       } else if (to === "chat") {
         dispatch(popBackTo({ name: "history" }));
         dispatch(push({ name: "chat" }));
diff --git a/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx b/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx
index 89d26c095..ac8817713 100644
--- a/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx
+++ b/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx
@@ -66,31 +66,45 @@ const DefaultHoverCard: React.FC<{
   outputTokens: number;
 }> = ({ inputTokens, outputTokens }) => {
   const { currentThreadUsage } = useUsageCounter();
+  if (!currentThreadUsage) return null;
 
   return (
     
       
         Tokens spent per chat thread:
       
-      
-      {currentThreadUsage?.cache_read_input_tokens !== undefined && (
-        
+      {inputTokens !== 0 && (
+        
       )}
-      {currentThreadUsage?.cache_creation_input_tokens !== undefined && (
-        
+      {currentThreadUsage.cache_read_input_tokens !== undefined &&
+        currentThreadUsage.cache_read_input_tokens !== 0 && (
+          
+        )}
+      {currentThreadUsage.cache_creation_input_tokens !== undefined &&
+        currentThreadUsage.cache_creation_input_tokens !== 0 && (
+          
+        )}
+      {outputTokens !== 0 && (
+        
       )}
-      
-      {currentThreadUsage?.completion_tokens_details && (
-        
+      {currentThreadUsage.completion_tokens_details && (
+        <>
+          {currentThreadUsage.completion_tokens_details.reasoning_tokens !==
+            0 && (
+            
+          )}
+        
       )}
     
   );
@@ -111,18 +125,22 @@ const InlineHoverTriggerContent: React.FC<{ messageTokens: number }> = ({
 
 const DefaultHoverTriggerContent: React.FC<{
   inputTokens: number;
-  outputValue: string;
-}> = ({ inputTokens, outputValue }) => {
+  outputTokens: number;
+}> = ({ inputTokens, outputTokens }) => {
   return (
     <>
-      
-        
-        {formatNumberToFixed(inputTokens)}
-      
-      
-        
-        {outputValue}
-      
+      {inputTokens !== 0 && (
+        
+          
+          {formatNumberToFixed(inputTokens)}
+        
+      )}
+      {outputTokens !== 0 && (
+        
+          
+          {formatNumberToFixed(outputTokens)}
+        
+      )}
     
   );
 };
@@ -154,7 +172,10 @@ export const UsageCounter: React.FC = ({
     usage: currentThreadUsage,
     keys: ["completion_tokens"],
   });
-  const outputValue = formatNumberToFixed(outputTokens);
+
+  const shouldUsageBeHidden = useMemo(() => {
+    return !isInline && inputTokens === 0 && outputTokens === 0;
+  }, [outputTokens, inputTokens, isInline]);
 
   useEffectOnce(() => {
     const handleScroll = (event: WheelEvent) => {
@@ -172,6 +193,8 @@ export const UsageCounter: React.FC = ({
     };
   });
 
+  if (shouldUsageBeHidden) return null;
+
   return (
     
       
@@ -187,7 +210,7 @@ export const UsageCounter: React.FC = ({
           ) : (
             
           )}
         
diff --git a/refact-agent/gui/src/events/index.ts b/refact-agent/gui/src/events/index.ts
index b6f987208..1fd4369c7 100644
--- a/refact-agent/gui/src/events/index.ts
+++ b/refact-agent/gui/src/events/index.ts
@@ -76,6 +76,7 @@ export {
   ideIsChatReady,
   ideToolCall,
   ideToolCallResponse,
+  ideSetCodeCompletionModel,
 } from "../hooks/useEventBusForIDE";
 
 export { ideAttachFileToChat } from "../hooks/useEventBusForApp";
diff --git a/refact-agent/gui/src/events/setup.ts b/refact-agent/gui/src/events/setup.ts
index e588fb61e..2fa1b99f1 100644
--- a/refact-agent/gui/src/events/setup.ts
+++ b/refact-agent/gui/src/events/setup.ts
@@ -21,20 +21,12 @@ export interface EnterpriseHost {
   apiKey: string;
 }
 
-export interface BringYourOwnKey {
-  type: "bring-your-own-key";
-}
-
 export interface ActionFromSetup {
   type: EVENT_NAMES_FROM_SETUP;
   payload?: Record;
 }
 
-export type HostSettings =
-  | CloudHost
-  | SelfHost
-  | EnterpriseHost
-  | BringYourOwnKey;
+export type HostSettings = CloudHost | SelfHost | EnterpriseHost;
 
 export function isActionFromSetup(action: unknown): action is ActionFromSetup {
   if (!action) return false;
diff --git a/refact-agent/gui/src/features/App.tsx b/refact-agent/gui/src/features/App.tsx
index aac76d736..218af159a 100644
--- a/refact-agent/gui/src/features/App.tsx
+++ b/refact-agent/gui/src/features/App.tsx
@@ -28,6 +28,7 @@ import { Tab } from "../components/Toolbar/Toolbar";
 import { PageWrapper } from "../components/PageWrapper";
 import { ThreadHistory } from "./ThreadHistory";
 import { Integrations } from "./Integrations";
+import { Providers } from "./Providers";
 import { UserSurvey } from "./UserSurvey";
 import { integrationsApi } from "../services/refact";
 import { KnowledgeList } from "./Knowledge";
@@ -202,6 +203,13 @@ export const InnerApp: React.FC = ({ style }: AppProps) => {
             handlePaddingShift={handlePaddingShift}
           />
         )}
+        {page.name === "providers page" && (
+          
+        )}
         {page.name === "thread history page" && (
            {
 
     await user.type(textarea, "hello");
 
-    await waitFor(() =>
-      app.queryByText(STUB_CAPS_RESPONSE.code_chat_default_model),
-    );
+    await waitFor(() => app.queryByText(STUB_CAPS_RESPONSE.chat_default_model));
 
     await user.keyboard("{Enter}");
 
@@ -294,7 +292,7 @@ describe("Chat", () => {
     // });
     await waitFor(() =>
       expect(
-        app.queryByText(STUB_CAPS_RESPONSE.code_chat_default_model),
+        app.queryByText(STUB_CAPS_RESPONSE.chat_default_model),
       ).not.toBeNull(),
     );
 
diff --git a/refact-agent/gui/src/features/Chat/Thread/actions.ts b/refact-agent/gui/src/features/Chat/Thread/actions.ts
index bbc08eca7..662b2742a 100644
--- a/refact-agent/gui/src/features/Chat/Thread/actions.ts
+++ b/refact-agent/gui/src/features/Chat/Thread/actions.ts
@@ -35,7 +35,11 @@ import { scanFoDuplicatesWith, takeFromEndWhile } from "../../../utils";
 import { debugApp } from "../../../debugConfig";
 import { ChatHistoryItem } from "../../History/historySlice";
 import { ideToolCallResponse } from "../../../hooks/useEventBusForIDE";
-import { capsApi } from "../../../services/refact";
+import {
+  capsApi,
+  DetailMessageWithErrorType,
+  isDetailMessage,
+} from "../../../services/refact";
 
 export const newChatAction = createAction | undefined>(
   "chatThread/new",
@@ -163,10 +167,6 @@ export const setIncreaseMaxTokens = createAction(
   "chatThread/setIncreaseMaxTokens",
 );
 
-export const setThreadPaused = createAction(
-  "chatThread/setThreadPaused",
-);
-
 // TODO: This is the circular dep when imported from hooks :/
 const createAppAsyncThunk = createAsyncThunk.withTypes<{
   state: RootState;
@@ -200,7 +200,7 @@ export const chatGenerateTitleThunk = createAppAsyncThunk<
   const caps = await thunkAPI
     .dispatch(capsApi.endpoints.getCaps.initiate(undefined))
     .unwrap();
-  const model = caps.code_chat_default_model;
+  const model = caps.chat_default_model;
   const messagesForLsp = formatMessagesForLsp([
     ...messagesToSend,
     {
@@ -355,9 +355,10 @@ export const chatAskQuestionThunk = createAppAsyncThunk<
       mode: realMode,
       boost_reasoning: boostReasoning,
     })
-      .then((response) => {
+      .then(async (response) => {
         if (!response.ok) {
-          return Promise.reject(new Error(response.statusText));
+          const responseData = (await response.json()) as unknown;
+          return Promise.reject(responseData);
         }
         const reader = response.body?.getReader();
         if (!reader) return;
@@ -374,12 +375,22 @@ export const chatAskQuestionThunk = createAppAsyncThunk<
         };
         return consumeStream(reader, thunkAPI.signal, onAbort, onChunk);
       })
-      .catch((err: Error) => {
+      .catch((err: unknown) => {
         // console.log("Catch called");
+        const isError = err instanceof Error;
         thunkAPI.dispatch(doneStreaming({ id: chatId }));
-        thunkAPI.dispatch(chatError({ id: chatId, message: err.message }));
         thunkAPI.dispatch(fixBrokenToolMessages({ id: chatId }));
-        return thunkAPI.rejectWithValue(err.message);
+
+        const errorObject: DetailMessageWithErrorType = {
+          detail: isError
+            ? err.message
+            : isDetailMessage(err)
+              ? err.detail
+              : (err as string),
+          errorType: isError ? "CHAT" : "GLOBAL",
+        };
+
+        return thunkAPI.rejectWithValue(errorObject);
       })
       .finally(() => {
         thunkAPI.dispatch(setMaxNewTokens(DEFAULT_MAX_NEW_TOKENS));
diff --git a/refact-agent/gui/src/features/Chat/Thread/reducer.ts b/refact-agent/gui/src/features/Chat/Thread/reducer.ts
index 5510485f3..b12f9be70 100644
--- a/refact-agent/gui/src/features/Chat/Thread/reducer.ts
+++ b/refact-agent/gui/src/features/Chat/Thread/reducer.ts
@@ -233,6 +233,7 @@ export const chatReducer = createReducer(initialState, (builder) => {
   builder.addCase(doneStreaming, (state, action) => {
     if (state.thread.id !== action.payload.id) return state;
     state.streaming = false;
+    state.waiting_for_response = false;
     state.thread.read = true;
     state.prevent_send = false;
   });
@@ -428,13 +429,13 @@ export const chatReducer = createReducer(initialState, (builder) => {
   builder.addMatcher(
     capsApi.endpoints.getCaps.matchFulfilled,
     (state, action) => {
-      const defaultModel = action.payload.code_chat_default_model;
+      const defaultModel = action.payload.chat_default_model;
 
       const model = state.thread.model || defaultModel;
-      if (!(model in action.payload.code_chat_models)) return;
+      if (!(model in action.payload.chat_models)) return;
 
       const currentModelMaximumContextTokens =
-        action.payload.code_chat_models[model].n_ctx;
+        action.payload.chat_models[model].n_ctx;
 
       state.thread.currentMaximumContextTokens =
         currentModelMaximumContextTokens;
diff --git a/refact-agent/gui/src/features/Chat/Thread/utils.ts b/refact-agent/gui/src/features/Chat/Thread/utils.ts
index 827868b6f..80910f5d8 100644
--- a/refact-agent/gui/src/features/Chat/Thread/utils.ts
+++ b/refact-agent/gui/src/features/Chat/Thread/utils.ts
@@ -34,6 +34,7 @@ import {
   isUserResponse,
   ThinkingBlock,
   isToolCallMessage,
+  Usage,
 } from "../../../services/refact";
 import { parseOrElse } from "../../../utils";
 import { type LspChatMessage } from "../../../services/refact";
@@ -232,13 +233,32 @@ export function formatChatResponse(
 
   const currentUsage = response.usage;
 
-  if (currentUsage && response.choices.length === 0) {
+  if (currentUsage) {
     const lastAssistantIndex = lastIndexOf(messages, isAssistantMessage);
     if (lastAssistantIndex === -1) return messages;
 
+    const lastAssistantMessage = messages[lastAssistantIndex];
+    if (!isAssistantMessage(lastAssistantMessage)) return messages;
+
+    const maybeLastAssistantMessageUsage = lastAssistantMessage.usage;
+    let usageToStore = currentUsage;
+
+    if (
+      maybeLastAssistantMessageUsage &&
+      Object.entries(currentUsage).every(
+        ([key, value]) =>
+          maybeLastAssistantMessageUsage[key as keyof Usage] === value,
+      )
+    ) {
+      usageToStore = { ...maybeLastAssistantMessageUsage, ...currentUsage };
+    }
+
     return messages.map((message, index) =>
       index === lastAssistantIndex
-        ? { ...message, usage: currentUsage }
+        ? {
+            ...message,
+            usage: usageToStore,
+          }
         : message,
     );
   }
@@ -673,8 +693,7 @@ export function consumeStream(
       const str = decoder.decode(value);
       const maybeError = checkForDetailMessage(str);
       if (maybeError) {
-        const error = new Error(maybeError.detail);
-        throw error;
+        return Promise.reject(maybeError);
       }
     }
 
@@ -708,7 +727,9 @@ export function consumeStream(
 
       const maybeJsonString = delta.substring(6);
 
-      if (maybeJsonString === "[DONE]") return Promise.resolve();
+      if (maybeJsonString === "[DONE]") {
+        return Promise.resolve();
+      }
 
       if (maybeJsonString === "[ERROR]") {
         const errorMessage = "error from lsp";
@@ -726,7 +747,7 @@ export function consumeStream(
         const error = new Error(errorMessage);
         // eslint-disable-next-line no-console
         console.error(error);
-        throw error;
+        return Promise.reject(maybeErrorData);
       }
 
       const fallback = {};
diff --git a/refact-agent/gui/src/features/Login/LoginPage.tsx b/refact-agent/gui/src/features/Login/LoginPage.tsx
index e0bd8b27f..3bdefeb5d 100644
--- a/refact-agent/gui/src/features/Login/LoginPage.tsx
+++ b/refact-agent/gui/src/features/Login/LoginPage.tsx
@@ -200,32 +200,6 @@ export const LoginPage: React.FC = () => {
             
           
         
-        
-          Bring your own key
-          
-            
-              
-                
    -
  • Connect to any OpenAI or Huggingface style server.
  • -
  • - Separate endpoints and keys for chat, completion, and - embedding. -
  • -
-
-
- - - - -
-
); diff --git a/refact-agent/gui/src/features/Pages/pagesSlice.ts b/refact-agent/gui/src/features/Pages/pagesSlice.ts index b9270d13b..6a36242d8 100644 --- a/refact-agent/gui/src/features/Pages/pagesSlice.ts +++ b/refact-agent/gui/src/features/Pages/pagesSlice.ts @@ -38,6 +38,10 @@ export interface LoginPage { name: "login page"; } +export interface ProvidersPage { + name: "providers page"; +} + export interface IntegrationsSetupPage { name: "integrations page"; projectPath?: string; @@ -61,6 +65,7 @@ export type Page = | DocumentationSettingsPage | ChatThreadHistoryPage | IntegrationsSetupPage + | ProvidersPage | KnowledgeListPage | LoginPage; diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.module.css b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.module.css new file mode 100644 index 000000000..4a820f1a8 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.module.css @@ -0,0 +1,10 @@ +.providerCard { + cursor: pointer; + user-select: none; + transition: opacity 0.15s ease-in-out; +} + +.providerCardDisabled { + opacity: 0.5; + cursor: not-allowed; +} diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.tsx b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.tsx new file mode 100644 index 000000000..8856ea871 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.tsx @@ -0,0 +1,64 @@ +import React from "react"; +import { Card, Flex, Heading } from "@radix-ui/themes"; + +import { OnOffSwitch } from "../../../components/OnOffSwitch/OnOffSwitch"; +import { iconsMap } from "../icons/iconsMap"; + +import type { ConfiguredProvidersResponse } from "../../../services/refact"; + +import { getProviderName } from "../getProviderName"; +import { useProviderCard } from "./useProviderCard"; + +import styles from "./ProviderCard.module.css"; +import { useUpdateProvider } from "../useUpdateProvider"; +import classNames from "classnames"; + +export type ProviderCardProps = { + provider: ConfiguredProvidersResponse["providers"][number]; + setCurrentProvider: ( + provider: ConfiguredProvidersResponse["providers"][number], + ) => void; +}; + +export const ProviderCard: React.FC = ({ + provider, + setCurrentProvider, +}) => { + const { isUpdatingEnabledState } = useUpdateProvider({ + provider, + }); + + const { handleClickOnProvider, handleSwitchClick } = useProviderCard({ + provider, + setCurrentProvider, + }); + + return ( + + + + {iconsMap[provider.name]} + + {getProviderName(provider)} + + + {!provider.readonly && ( + + + + )} + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/index.ts b/refact-agent/gui/src/features/Providers/ProviderCard/index.ts new file mode 100644 index 000000000..705ad4825 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderCard/index.ts @@ -0,0 +1 @@ +export { ProviderCard, type ProviderCardProps } from "./ProviderCard"; diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/useProviderCard.ts b/refact-agent/gui/src/features/Providers/ProviderCard/useProviderCard.ts new file mode 100644 index 000000000..45ee65cfd --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderCard/useProviderCard.ts @@ -0,0 +1,32 @@ +import { type MouseEventHandler, useCallback } from "react"; +import { ProviderCardProps } from "./ProviderCard"; +import { useUpdateProvider } from "../useUpdateProvider"; + +export function useProviderCard({ + provider, + setCurrentProvider, +}: { + provider: ProviderCardProps["provider"]; + setCurrentProvider: ProviderCardProps["setCurrentProvider"]; +}) { + const { updateProviderEnabledState, isUpdatingEnabledState } = + useUpdateProvider({ provider }); + + const handleClickOnProvider = useCallback(() => { + if (isUpdatingEnabledState) return; + + setCurrentProvider(provider); + }, [setCurrentProvider, provider, isUpdatingEnabledState]); + + const handleSwitchClick: MouseEventHandler = (event) => { + if (isUpdatingEnabledState) return; + + event.stopPropagation(); + void updateProviderEnabledState(); + }; + + return { + handleClickOnProvider, + handleSwitchClick, + }; +} diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/FormFields.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/FormFields.tsx new file mode 100644 index 000000000..4df26f5c3 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/FormFields.tsx @@ -0,0 +1,75 @@ +import { FC } from "react"; +import classNames from "classnames"; + +import { Flex, Select, TextField } from "@radix-ui/themes"; +import { toPascalCase } from "../../../utils/toPascalCase"; + +import type { Provider } from "../../../services/refact"; + +import styles from "./ProviderForm.module.css"; + +export type FormFieldsProps = { + providerData: Provider; + fields: Record; + onChange: (updatedProviderData: Provider) => void; +}; + +export const FormFields: FC = ({ + providerData, + fields, + onChange, +}) => { + return Object.entries(fields).map(([key, value], idx) => { + if (key === "endpoint_style" && providerData.name === "custom") { + const availableOptions: Provider["endpoint_style"][] = ["openai", "hf"]; + const displayValues = ["OpenAI", "HuggingFace"]; + return ( + + {toPascalCase(key)} + + onChange({ ...providerData, endpoint_style: value }) + } + disabled={providerData.readonly} + > + + + {availableOptions.map((option, idx) => ( + + {displayValues[idx]} + + ))} + + + + ); + } + + if (key === "endpoint_style") return null; + + if ( + !providerData.supports_completion && + (key === "completion_default_model" || key === "completion_endpoint") + ) { + return null; + } + + return ( + + + + onChange({ ...providerData, [key]: event.target.value }) + } + className={classNames({ + [styles.disabledField]: providerData.readonly, + })} + disabled={providerData.readonly} + /> + + ); + }); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.module.css b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.module.css new file mode 100644 index 000000000..92073748c --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.module.css @@ -0,0 +1,19 @@ +.button { + transition: all 0.1s ease-in-out; +} + +.extraButton { + width: 100%; + padding: 5px 0; +} +.disabledSwitch { + span { + background-color: rgba(255, 255, 255, 0.3); + } +} + +.disabledField { + input { + cursor: not-allowed; + } +} diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.tsx new file mode 100644 index 000000000..86e4a1b5d --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.tsx @@ -0,0 +1,123 @@ +import React from "react"; +import classNames from "classnames"; +import { Button, Flex, Separator, Switch } from "@radix-ui/themes"; + +import { FormFields } from "./FormFields"; +import { Spinner } from "../../../components/Spinner"; + +import { useProviderForm } from "./useProviderForm"; +import type { Provider, SimplifiedProvider } from "../../../services/refact"; + +import { toPascalCase } from "../../../utils/toPascalCase"; +import { aggregateProviderFields } from "./utils"; + +import styles from "./ProviderForm.module.css"; +import { ProviderModelsList } from "./ProviderModelsList/ProviderModelsList"; + +export type ProviderFormProps = { + currentProvider: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + >; + isProviderConfigured: boolean; + isSaving: boolean; + handleDiscardChanges: () => void; + handleSaveChanges: (updatedProviderData: Provider) => void; +}; + +export const ProviderForm: React.FC = ({ + currentProvider, + isProviderConfigured, + isSaving, + handleDiscardChanges, + handleSaveChanges, +}) => { + const { + areShowingExtraFields, + formValues, + handleFormValuesChange, + isProviderLoadedSuccessfully, + setAreShowingExtraFields, + shouldSaveButtonBeDisabled, + } = useProviderForm({ providerName: currentProvider.name }); + + if (!isProviderLoadedSuccessfully || !formValues) return ; + + const { extraFields, importantFields } = aggregateProviderFields(formValues); + + return ( + + + + + + handleFormValuesChange({ ...formValues, ["enabled"]: checked }) + } + /> + + + + + + + {areShowingExtraFields && ( + + + + )} + + + + {isProviderConfigured && ( + + )} + + + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.module.css b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.module.css new file mode 100644 index 000000000..e61fee90d --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.module.css @@ -0,0 +1,5 @@ +.disabledCard { + opacity: 0.5; + pointer-events: none; + transition: 0.15s ease-in-out; +} diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.tsx new file mode 100644 index 000000000..e5b07fb2f --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.tsx @@ -0,0 +1,167 @@ +import { useCallback, useMemo, type FC } from "react"; +import classNames from "classnames"; +import { + Badge, + Card, + DropdownMenu, + Flex, + IconButton, + Text, +} from "@radix-ui/themes"; +import { DotsVerticalIcon } from "@radix-ui/react-icons"; + +import { ModelCardPopup } from "./components/ModelCardPopup"; +import { useModelDialogState } from "./hooks/useModelDialogState"; + +import type { ModelType, SimplifiedModel } from "../../../../services/refact"; + +import styles from "./ModelCard.module.css"; +import { useEventsBusForIDE } from "../../../../hooks"; + +export type ModelCardProps = { + model: SimplifiedModel; + providerName: string; + modelType: ModelType; + isReadonlyProvider: boolean; + currentModelNames: string[]; +}; + +/** + * Card component that displays model information and provides access to model settings + */ +export const ModelCard: FC = ({ + model, + modelType, + providerName, + isReadonlyProvider, + currentModelNames, +}) => { + const { enabled, name, removable, user_configured } = model; + const { + isOpen: dialogOpen, + setIsOpen: setDialogOpen, + dropdownOpen, + setDropdownOpen, + openDialogSafely, + isSavingModel, + handleToggleModelEnabledState, + handleRemoveModel, + handleResetModel, + handleSaveModel, + handleUpdateModel, + } = useModelDialogState({ + initialState: false, + modelType, + providerName, + }); + + const { setCodeCompletionModel } = useEventsBusForIDE(); + + const handleSetCompletionModelForIDE = useCallback(() => { + const formattedModelName = `${providerName}/${model.name}`; + setCodeCompletionModel(formattedModelName); + }, [model, providerName, setCodeCompletionModel]); + + const dropdownOptions = useMemo(() => { + const shouldOptionsBeDisabled = isReadonlyProvider || isSavingModel; + return [ + { + label: "Edit model's settings", + onClick: openDialogSafely, + visible: !shouldOptionsBeDisabled, + }, + { + label: enabled ? "Disable model" : "Enable model", + onClick: () => void handleToggleModelEnabledState(model), + visible: !shouldOptionsBeDisabled, + }, + { + label: "Reset model", + onClick: () => void handleResetModel(model), + visible: !removable && user_configured, + }, + { + label: "Remove model", + onClick: () => void handleRemoveModel({ model }), + visible: removable, + }, + { + label: "Use as completion model in IDE", + onClick: handleSetCompletionModelForIDE, + visible: modelType === "completion", + }, + ]; + }, [ + isReadonlyProvider, + isSavingModel, + enabled, + removable, + user_configured, + model, + modelType, + openDialogSafely, + handleToggleModelEnabledState, + handleResetModel, + handleRemoveModel, + handleSetCompletionModelForIDE, + ]); + + const dropdownOptionsCount = useMemo(() => { + return dropdownOptions.filter((option) => option.visible).length; + }, [dropdownOptions]); + + return ( + + {dialogOpen && ( + + )} + + + + + {name} + + + {enabled ? "Active" : "Inactive"} + + + + {dropdownOptionsCount > 0 && ( + + + + + + + + {dropdownOptions.map(({ label, visible, onClick }) => { + if (!visible) return null; + return ( + + {label} + + ); + })} + + + )} + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ProviderModelsList.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ProviderModelsList.tsx new file mode 100644 index 000000000..e36eccacb --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ProviderModelsList.tsx @@ -0,0 +1,126 @@ +import { useCallback, type FC } from "react"; +import { Flex, Heading, Separator, Text } from "@radix-ui/themes"; + +import type { ProviderFormProps } from "../ProviderForm"; + +import { Spinner } from "../../../../components/Spinner"; +import { ModelCard } from "./ModelCard"; +import { AddModelButton } from "./components"; + +import { useGetModelsByProviderNameQuery } from "../../../../hooks/useModelsQuery"; +import { ModelsResponse } from "../../../../services/refact"; + +export type ProviderModelsListProps = { + provider: ProviderFormProps["currentProvider"]; +}; + +const NoModelsText: FC = () => { + return ( + + No models available, but you can add one by clicking 'Add model' + + ); +}; + +export const ProviderModelsList: FC = ({ + provider, +}) => { + const { + data: modelsData, + isSuccess, + isLoading, + } = useGetModelsByProviderNameQuery({ + providerName: provider.name, + }); + + const getModelNames = useCallback((modelsData: ModelsResponse) => { + const currentChatModelNames = modelsData.chat_models.map((m) => m.name); + const currentCompletionModelNames = modelsData.completion_models.map( + (m) => m.name, + ); + + return { + currentChatModelNames, + currentCompletionModelNames, + }; + }, []); + + if (isLoading) return ; + + if (!isSuccess) return
Something went wrong :/
; + + const { chat_models, completion_models } = modelsData; + + const { currentChatModelNames, currentCompletionModelNames } = + getModelNames(modelsData); + + return ( + + + Models list + + + + Chat Models + + {chat_models.length > 0 ? ( + chat_models.map((m) => { + return ( + + ); + }) + ) : ( + + )} + {!provider.readonly && ( + + )} + {provider.supports_completion && ( + <> + + Completion Models + + {completion_models.length > 0 ? ( + completion_models.map((m) => { + return ( + + ); + }) + ) : ( + + )} + {!provider.readonly && ( + + )} + + )} + {/* TODO: do we want to expose embedding model configuration updates? */} + {/* + Embedding Model + +
{modelsData.embedding_model.name}
*/} +
+ ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/AddModelButton.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/AddModelButton.tsx new file mode 100644 index 000000000..d539aafea --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/AddModelButton.tsx @@ -0,0 +1,54 @@ +import type { FC } from "react"; +import { useModelDialogState } from "../hooks"; +import { ModelType } from "../../../../../services/refact"; +import { ModelCardPopup } from "./ModelCardPopup"; +import { Button } from "@radix-ui/themes"; + +export type AddModelButtonProps = { + modelType: ModelType; + providerName: string; + currentModelNames: string[]; +}; + +export const AddModelButton: FC = ({ + modelType, + providerName, + currentModelNames, +}) => { + const { + isOpen, + setIsOpen, + isSavingModel, + handleSaveModel, + handleUpdateModel, + } = useModelDialogState({ + modelType, + providerName, + initialState: false, + }); + + return ( + <> + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/CapabilityBadge.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/CapabilityBadge.tsx new file mode 100644 index 000000000..07cc633e1 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/CapabilityBadge.tsx @@ -0,0 +1,38 @@ +import { Badge } from "@radix-ui/themes"; +import { CheckIcon, Cross1Icon } from "@radix-ui/react-icons"; +import { FC } from "react"; + +type CapabilityBadgeProps = { + name: string; + enabled: boolean; + displayValue?: string | null; + onClick?: () => void; + interactive?: boolean; +}; + +/** + * Reusable component for model capability badges + */ +export const CapabilityBadge: FC = ({ + name, + enabled, + onClick, + displayValue = null, + interactive = true, +}) => { + const icon = enabled ? ( + + ) : ( + + ); + + return ( + + {name} {displayValue ? displayValue : icon} + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormField.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormField.tsx new file mode 100644 index 000000000..326afd8fe --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormField.tsx @@ -0,0 +1,53 @@ +import { Text, TextField } from "@radix-ui/themes"; +import { FC, ReactNode } from "react"; +import { Markdown } from "../../../../../components/Markdown"; + +type FormFieldProps = { + label: string; + value?: string; + placeholder?: string; + description?: string; + type?: TextField.RootProps["type"]; + isDisabled?: boolean; + max?: string; + onChange?: React.ChangeEventHandler; + children?: ReactNode; +}; + +/** + * Reusable form field component with consistent styling + */ +export const FormField: FC = ({ + label, + value, + placeholder, + description, + isDisabled, + type, + max, + onChange, + children, +}) => { + return ( + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormSelect.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormSelect.tsx new file mode 100644 index 000000000..4f4ad421a --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormSelect.tsx @@ -0,0 +1,69 @@ +import { Flex, Select, Text } from "@radix-ui/themes"; +import { ReactNode } from "react"; + +type FormSelectProps = { + label: string; + options?: OptionType[]; + optionTransformer?: (option: OptionType) => OptionType; + value: string; + placeholder?: string; + description?: string; + isDisabled?: boolean; + onValueChange?: (value: string) => void; + children?: ReactNode; +}; + +/** + * Type for the options of the form select component + */ +export type OptionType = string | null; + +/** + * Reusable form select component with consistent styling + */ +export function FormSelect({ + label, + options, + value, + placeholder, + description, + isDisabled, + onValueChange, + optionTransformer, +}: FormSelectProps) { + return ( + + + {label} + + {description && ( + + {description} + + )} + + + + {options?.map((option) => { + if (option !== null) { + return ( + + {optionTransformer ? optionTransformer(option) : option}{" "} + + ); + } + return ( + + None + + ); + })} + + + + ); +} diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/ModelCardPopup.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/ModelCardPopup.tsx new file mode 100644 index 000000000..216a2fa93 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/ModelCardPopup.tsx @@ -0,0 +1,512 @@ +import type { FC } from "react"; +import React, { + useState, + useEffect, + useCallback, + ChangeEvent, + useMemo, +} from "react"; +import isEqual from "lodash.isequal"; +import { Button, Dialog, Flex, Text } from "@radix-ui/themes"; + +import { + useGetCompletionModelFamiliesQuery, + useGetModelConfiguration, + useGetModelDefaults, +} from "../../../../../hooks/useModelsQuery"; + +import { FormField } from "./FormField"; +import { CapabilityBadge } from "./CapabilityBadge"; + +import type { + CodeChatModel, + CodeCompletionModel, + EmbeddingModel, + Model, + ModelType, + SimplifiedModel, + SupportsReasoningStyle, +} from "../../../../../services/refact"; + +import { extractHumanReadableReasoningType } from "../utils"; +import { useEffectOnce } from "../../../../../hooks"; +import { FormSelect } from "./FormSelect"; +import { Spinner } from "../../../../../components/Spinner"; + +const SUPPORTED_REASONING_STYLES: SupportsReasoningStyle[] = [ + "openai", + "deepseek", + "anthropic", + null, +]; + +export type ModelCardPopupProps = { + minifiedModel?: SimplifiedModel; + isOpen: boolean; + isSaving: boolean; + setIsOpen: (state: boolean) => void; + onSave: (model: Model) => Promise; + onUpdate: ({ + model, + oldModel, + }: { + model: Model; + oldModel: SimplifiedModel; + }) => Promise; + modelName: string; + modelType: ModelType; + providerName: string; + currentModelNames: string[]; + newModelCreation?: boolean; + isRemovable?: boolean; +}; + +export const ModelCardPopup: FC = ({ + isOpen, + isSaving, + setIsOpen, + onSave, + onUpdate, + modelName, + modelType, + providerName, + minifiedModel, + currentModelNames, + newModelCreation = false, + isRemovable = false, +}) => { + const { + data: configuredModelData, + isSuccess: _isConfiguredModelDataLoaded, + currentData: configuredModelCurrentData, + } = useGetModelConfiguration({ + modelName, + modelType, + providerName, + }); + + const { data: defaultModelData, isSuccess: isDefaultModelDataLoaded } = + useGetModelDefaults({ + modelType, + providerName, + }); + const [editedModelData, setEditedModelData] = useState( + configuredModelData, + ); + + const areDefaultsUnavailable = useMemo(() => { + const dataToCompare = { + ...editedModelData, + name: "", + }; + return isEqual(defaultModelData, dataToCompare); + }, [defaultModelData, editedModelData]); + + const isSavingDisabled = useMemo(() => { + if (!editedModelData?.name) { + return true; + } + const isNameTaken = currentModelNames.some( + (existingName) => + existingName === editedModelData.name && existingName !== modelName, + ); + // TODO: maybe we should move it out somewhere :P + const REQUIRED_FIELD_KEYS = ["tokenizer", "n_ctx"]; + + const someFieldsNotFilled = Object.entries(editedModelData).some( + ([key, value]) => { + if (REQUIRED_FIELD_KEYS.includes(key)) { + if (!value) return true; + } + + return false; + }, + ); + + if (isNameTaken) return true; + + return isEqual(configuredModelData, editedModelData) || someFieldsNotFilled; + }, [configuredModelData, editedModelData, currentModelNames, modelName]); + + useEffect(() => { + if (isOpen) { + if (configuredModelData) { + setEditedModelData((prev) => { + if (isEqual(prev, configuredModelCurrentData)) return prev; + return configuredModelData; + }); + return; + } + setEditedModelData(defaultModelData); + } + }, [ + isOpen, + configuredModelData, + configuredModelCurrentData, + defaultModelData, + newModelCreation, + modelType, + ]); + + useEffectOnce(() => { + return () => { + setEditedModelData(undefined); + }; + }); + + const handleSetDefaultModelData = useCallback(() => { + if (!isDefaultModelDataLoaded) return; + const updatedData = { + ...defaultModelData, + name: newModelCreation ? defaultModelData.name : modelName, + }; + setEditedModelData(updatedData); + }, [isDefaultModelDataLoaded, newModelCreation, modelName, defaultModelData]); + + const handleSave = useCallback(async () => { + if (!isOpen || !editedModelData) return; + + let isSuccess: boolean; + + if (minifiedModel && minifiedModel.name !== editedModelData.name) { + isSuccess = await onUpdate({ + model: editedModelData, + oldModel: minifiedModel, + }); + } else { + isSuccess = await onSave(editedModelData); + } + if (!isSuccess) return; + + setTimeout(() => setIsOpen(false), 0); + }, [isOpen, editedModelData, minifiedModel, setIsOpen, onSave, onUpdate]); + + const handleCancel = useCallback(() => { + setTimeout(() => setIsOpen(false), 0); + }, [setIsOpen]); + + const handleDialogChange = useCallback( + (open: boolean) => { + setIsOpen(open); + }, + [setIsOpen], + ); + + const getValueByType = (value: string, valueType: string) => { + if (valueType === "string") return value; + if (valueType === "number") return parseFloat(value); + return value; + }; + + const updateFieldByKey = useCallback( + (key: string, value: string | number) => { + if (!editedModelData) return; + setEditedModelData({ + ...editedModelData, + [key]: value, + }); + }, + [editedModelData], + ); + + const handleFieldValueChange = useCallback( + (e: ChangeEvent, field: string) => { + const valueType = typeof editedModelData?.[field as keyof Model]; + const value = getValueByType(e.target.value, valueType); + updateFieldByKey(field, value); + }, + [editedModelData, updateFieldByKey], + ); + + // Toggle capability value + const toggleCapability = (key: string) => { + if (!editedModelData) return; + + setEditedModelData({ + ...editedModelData, + [key]: !editedModelData[key as keyof typeof editedModelData], + }); + }; + + if (!configuredModelData && !newModelCreation) { + return null; + } + + if (!configuredModelData && !newModelCreation) return null; + + return ( + + + Model Configuration + + {!newModelCreation + ? `Make changes to ${modelName} (${modelType} model)` + : `Setup new model for ${providerName} (${modelType} model)`} + + + + handleFieldValueChange(e, "name")} + placeholder="Model name" + isDisabled={!newModelCreation && !isRemovable} + /> + {editedModelData?.type === "completion" && ( + + )} + + {editedModelData?.type === "chat" && ( + + )} + + {editedModelData?.type === "embedding" && ( + + )} + + + + + + + + + + + + ); +}; + +type CompletionModelFieldsProps = { + editedModelData: CodeCompletionModel; + handleFieldValueChange: ( + e: ChangeEvent, + field: string, + ) => void; + updateFieldByKey: (key: string, value: string | number) => void; +}; + +const CompletionModelFields: FC = ({ + editedModelData, + handleFieldValueChange, + updateFieldByKey, +}) => { + const { + data: modelFamiliesData, + isSuccess, + isLoading, + } = useGetCompletionModelFamiliesQuery(); + if (isLoading || !isSuccess) return ; + + const aggregatedModelFamilies = [...modelFamiliesData.model_families, null]; + return ( + <> + handleFieldValueChange(e, "n_ctx")} + placeholder="Context window size" + type="number" + /> + updateFieldByKey("model_family", value)} + options={aggregatedModelFamilies} + /> + + ); +}; + +// Chat model specific fields +type ChatModelFieldsProps = { + editedModelData?: CodeChatModel; + setEditedModelData: (data: Model) => void; + toggleCapability: (key: string) => void; + handleFieldValueChange: ( + e: ChangeEvent, + field: string, + ) => void; +}; + +const ChatModelFields: FC = ({ + editedModelData, + setEditedModelData, + toggleCapability, + handleFieldValueChange, +}) => { + const handleTemperatureChange = (e: React.ChangeEvent) => { + if (!editedModelData) return; + const value = parseFloat(e.target.value); + const digits = e.target.value + .split("") + .map((s) => (s === "." ? undefined : s)); + + if (value > 1 || digits.length > 8) { + e.target.value = "1"; + } + + setEditedModelData({ + ...editedModelData, + type: "chat", + default_temperature: + e.target.value === "" ? null : Math.min(parseFloat(e.target.value), 1), + }); + }; + + const handleReasoningStyleChange = (value: string) => { + if (!editedModelData) return; + + setEditedModelData({ + ...editedModelData, + type: "chat", + supports_boost_reasoning: + value === "null" ? false : editedModelData.supports_boost_reasoning, + supports_reasoning: + value === "null" ? null : (value as SupportsReasoningStyle), + }); + }; + + if (!editedModelData) return null; + + return ( + <> + handleFieldValueChange(e, "n_ctx")} + placeholder="Context window size" + type="number" + /> + handleFieldValueChange(e, "tokenizer")} + placeholder="Tokenizer name" + /> + + + + + + Capabilities + + + toggleCapability("supports_tools")} + /> + toggleCapability("supports_multimodality")} + /> + toggleCapability("supports_clicks")} + /> + toggleCapability("supports_agent")} + /> + {editedModelData.supports_reasoning && ( + toggleCapability("supports_boost_reasoning")} + /> + )} + + + + ); +}; + +// Embedding model specific fields +type EmbeddingModelFieldsProps = { + editedModelData: EmbeddingModel; + handleFieldValueChange: ( + e: ChangeEvent, + field: string, + ) => void; +}; + +const EmbeddingModelFields: FC = ({ + editedModelData, + handleFieldValueChange, +}) => { + return ( + <> + handleFieldValueChange(e, "embedding_size")} + placeholder="Embedding size" + type="number" + /> + handleFieldValueChange(e, "rejection_threshold")} + placeholder="Rejection threshold" + type="number" + /> + handleFieldValueChange(e, "embedding_batch")} + placeholder="Embedding batch" + type="number" + /> + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/index.ts new file mode 100644 index 000000000..f53752ece --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/index.ts @@ -0,0 +1,4 @@ +export * from "./CapabilityBadge"; +export * from "./FormField"; +export * from "./ModelCardPopup"; +export * from "./AddModelButton"; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/index.ts new file mode 100644 index 000000000..359b7d586 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/index.ts @@ -0,0 +1 @@ +export * from "./useModelDialogState"; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/useModelDialogState.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/useModelDialogState.ts new file mode 100644 index 000000000..70eafb75c --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/useModelDialogState.ts @@ -0,0 +1,241 @@ +import { useState, useCallback } from "react"; + +import { + useDeleteModelMutation, + useGetLazyModelConfiguration, + useUpdateModelMutation, +} from "../../../../../hooks/useModelsQuery"; +import { useAppDispatch } from "../../../../../hooks"; + +import { setInformation } from "../../../../Errors/informationSlice"; +import { setError } from "../../../../Errors/errorsSlice"; + +import { modelsApi } from "../../../../../services/refact"; +import type { + Model, + ModelType, + SimplifiedModel, +} from "../../../../../services/refact"; + +/** + * Custom hook for managing model dialog state with body style reset functionality + */ +export const useModelDialogState = ({ + modelType, + providerName, + initialState = false, +}: { + modelType: ModelType; + providerName: string; + initialState?: boolean; +}) => { + const dispatch = useAppDispatch(); + + const [isOpen, setIsOpenState] = useState(initialState); + const [isSavingModel, setIsSavingModel] = useState(false); + const [isRemovingModel, setIsRemovingModel] = useState(false); + const [dropdownOpen, setDropdownOpen] = useState(false); + + const getModelData = useGetLazyModelConfiguration(); + const updateModel = useUpdateModelMutation(); + const deleteModel = useDeleteModelMutation(); + + const resetBodyStyles = useCallback(() => { + document.body.style.pointerEvents = ""; + }, []); + + const setIsOpen = useCallback( + (state: boolean) => { + setIsOpenState(state); + if (!state) { + resetBodyStyles(); + } + }, + [resetBodyStyles], + ); + + const openDialogSafely = useCallback(() => { + setDropdownOpen(false); + // Using a small timeout to avoid style conflicts + setTimeout(() => { + setIsOpenState(true); + }, 10); + }, []); + + const handleToggleModelEnabledState = useCallback( + async (model: SimplifiedModel) => { + setIsSavingModel(true); + const { data: modelData } = await getModelData({ + providerName, + modelName: model.name, + modelType: modelType, + }); + + if (!modelData) { + setIsSavingModel(false); + return; + } + + const enabled = modelData.enabled; + + const response = await updateModel({ + model: { + ...modelData, + enabled: !enabled, + }, + provider: providerName, + type: modelType, + }); + + if (response.error) { + dispatch( + setError( + `Error occurred on ${enabled ? "disabling" : "enabling"} ${ + model.name + } configuration. Check if your model configuration is correct`, + ), + ); + setIsSavingModel(false); + return; + } + + const actions = [ + setInformation( + `Model ${model.name} ${ + enabled ? "disabled" : "enabled" + } successfully!`, + ), + modelsApi.util.invalidateTags(["MODELS", "MODEL"]), + ]; + + actions.forEach((action) => dispatch(action)); + setIsSavingModel(false); + }, + [dispatch, getModelData, updateModel, modelType, providerName], + ); + + const handleRemoveModel = useCallback( + async ({ + model, + operationType = "remove", + isSilent = false, + }: { + model: SimplifiedModel; + operationType?: "remove" | "reset"; + isSilent?: boolean; + }) => { + const response = await deleteModel({ + model: model.name, + provider: providerName, + type: modelType, + }); + + if (response.error) { + dispatch( + setError( + `Something went wrong during ${ + operationType === "remove" ? "removal" : "reset" + } of ${model.name} model. Please, try again`, + ), + ); + setIsRemovingModel(false); + return false; + } + + if (!isSilent) { + dispatch( + setInformation( + `Model ${model.name} was ${ + operationType === "remove" ? "removed" : "reset" + } successfully!`, + ), + ); + } + + dispatch(modelsApi.util.invalidateTags(["MODELS"])); + setIsRemovingModel(false); + return true; + }, + [dispatch, deleteModel, providerName, modelType], + ); + + const handleResetModel = useCallback( + async (model: SimplifiedModel) => { + const isSuccess = await handleRemoveModel({ + model, + operationType: "reset", + }); + if (isSuccess) { + dispatch(modelsApi.util.invalidateTags(["MODELS"])); + } + }, + [dispatch, handleRemoveModel], + ); + + const handleSaveModel = useCallback( + async (modelData: Model) => { + setIsSavingModel(true); + const response = await updateModel({ + model: modelData, + provider: providerName, + type: modelType, + }); + + if (response.error) { + dispatch( + setError( + `Something went wrong during update of ${modelData.name} model. Please, try again`, + ), + ); + setIsSavingModel(false); + return false; + } + const actions = [ + setInformation(`Model ${modelData.name} was updated successfully!`), + modelsApi.util.invalidateTags(["MODELS"]), + ]; + + actions.forEach((action) => dispatch(action)); + setIsSavingModel(false); + return true; + }, + [dispatch, setIsSavingModel, providerName, modelType, updateModel], + ); + + const handleUpdateModel = useCallback( + async ({ + model, + oldModel, + }: { + model: Model; + oldModel: SimplifiedModel; + }) => { + const removeResult = await handleRemoveModel({ + model: oldModel, + isSilent: true, + }); + if (!removeResult) return false; + const updateResult = await handleSaveModel(model); + return updateResult; + }, + [handleSaveModel, handleRemoveModel], + ); + + return { + isOpen, + isSavingModel, + isRemovingModel, + setIsRemovingModel, + setIsSavingModel, + setIsOpen, + dropdownOpen, + setDropdownOpen, + openDialogSafely, + resetBodyStyles, + handleSaveModel, + handleRemoveModel, + handleResetModel, + handleUpdateModel, + handleToggleModelEnabledState, + }; +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/index.ts new file mode 100644 index 000000000..962a6b817 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/index.ts @@ -0,0 +1 @@ +export { ProviderModelsList } from "./ProviderModelsList"; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/extractHumanReadableReasoningType.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/extractHumanReadableReasoningType.ts new file mode 100644 index 000000000..e1b2020aa --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/extractHumanReadableReasoningType.ts @@ -0,0 +1,26 @@ +import { SupportsReasoningStyle } from "../../../../../services/refact"; +import { BEAUTIFUL_PROVIDER_NAMES } from "../../../constants"; + +export function isSupportsReasoningStyle( + data: string | null, +): data is SupportsReasoningStyle { + return ( + data === "openai" || + data === "anthropic" || + data === "deepseek" || + data === null + ); +} + +export function extractHumanReadableReasoningType( + reasoningType: string | null, +) { + if (!isSupportsReasoningStyle(reasoningType)) return null; + if (!reasoningType) return null; + + const maybeReadableReasoningType = BEAUTIFUL_PROVIDER_NAMES[reasoningType]; + + return maybeReadableReasoningType + ? maybeReadableReasoningType + : reasoningType; +} diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/index.ts new file mode 100644 index 000000000..94393d143 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/index.ts @@ -0,0 +1 @@ +export * from "./extractHumanReadableReasoningType"; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/index.ts new file mode 100644 index 000000000..a7d16ae6c --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/index.ts @@ -0,0 +1 @@ +export { ProviderForm } from "./ProviderForm"; diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/useProviderForm.ts b/refact-agent/gui/src/features/Providers/ProviderForm/useProviderForm.ts new file mode 100644 index 000000000..ff4ed57ad --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/useProviderForm.ts @@ -0,0 +1,54 @@ +import isEqual from "lodash.isequal"; +import { useCallback, useEffect, useMemo, useState } from "react"; +import type { Provider } from "../../../services/refact"; +import { + useGetConfiguredProvidersQuery, + useGetProviderQuery, +} from "../../../hooks/useProvidersQuery"; + +export function useProviderForm({ providerName }: { providerName: string }) { + const { data: detailedProvider, isSuccess: isProviderLoadedSuccessfully } = + useGetProviderQuery({ + providerName: providerName, + }); + const { data: configuredProviders } = useGetConfiguredProvidersQuery(); + + const [formValues, setFormValues] = useState(null); + const [areShowingExtraFields, setAreShowingExtraFields] = useState(false); + + useEffect(() => { + if (detailedProvider) { + setFormValues(detailedProvider); + } + }, [detailedProvider]); + + const shouldSaveButtonBeDisabled = useMemo(() => { + if (!detailedProvider) return true; + + const isProviderConfigured = configuredProviders?.providers.some( + (p) => p.name === providerName, + ); + if (!isProviderConfigured) return false; + + return detailedProvider.readonly || isEqual(formValues, detailedProvider); + }, [configuredProviders, detailedProvider, formValues, providerName]); + + const handleFormValuesChange = useCallback( + (updatedProviderData: Provider) => { + setFormValues(updatedProviderData); + }, + [], + ); + + return { + formValues, + setFormValues, + areShowingExtraFields, + setAreShowingExtraFields, + shouldSaveButtonBeDisabled, + handleFormValuesChange, + configuredProviders, + detailedProvider, + isProviderLoadedSuccessfully, + }; +} diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/utils.ts b/refact-agent/gui/src/features/Providers/ProviderForm/utils.ts new file mode 100644 index 000000000..3157bd6a4 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderForm/utils.ts @@ -0,0 +1,40 @@ +import type { Provider } from "../../../services/refact"; + +export type AggregatedProviderFields = { + importantFields: Record; + extraFields: Record; +}; + +const EXTRA_FIELDS_KEYS = [ + "embedding_endpoint", + "completion_endpoint", + "chat_endpoint", + "tokenizer_api_key", +]; +const HIDDEN_FIELDS_KEYS = [ + "name", + "readonly", + "enabled", + "supports_completion", +]; + +export function aggregateProviderFields(providerData: Provider) { + return Object.entries(providerData).reduce( + (acc, [key, value]) => { + const stringValue = value; + + if (HIDDEN_FIELDS_KEYS.some((hiddenField) => hiddenField === key)) { + return acc; + } + + if (EXTRA_FIELDS_KEYS.some((extraField) => extraField === key)) { + acc.extraFields[key] = stringValue; + } else { + acc.importantFields[key] = stringValue; + } + + return acc; + }, + { importantFields: {}, extraFields: {} }, + ); +} diff --git a/refact-agent/gui/src/features/Providers/ProviderPreview/ProviderPreview.tsx b/refact-agent/gui/src/features/Providers/ProviderPreview/ProviderPreview.tsx new file mode 100644 index 000000000..0a4be0bc5 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderPreview/ProviderPreview.tsx @@ -0,0 +1,68 @@ +import React from "react"; +import { Flex, Heading } from "@radix-ui/themes"; + +import { ProviderForm } from "../ProviderForm"; + +import { useProviderPreview } from "./useProviderPreview"; +import { getProviderName } from "../getProviderName"; + +import type { SimplifiedProvider } from "../../../services/refact"; +import { DeletePopover } from "../../../components/DeletePopover"; + +export type ProviderPreviewProps = { + configuredProviders: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + >[]; + currentProvider: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + >; + handleSetCurrentProvider: ( + provider: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + > | null, + ) => void; +}; + +export const ProviderPreview: React.FC = ({ + configuredProviders, + currentProvider, + handleSetCurrentProvider, +}) => { + const { + handleDiscardChanges, + handleSaveChanges, + handleDeleteProvider, + isDeletingProvider, + isSavingProvider, + } = useProviderPreview(handleSetCurrentProvider); + + return ( + + + + {getProviderName(currentProvider)} Configuration + + + void handleDeleteProvider(providerName) + } + /> + + + void handleSaveChanges(updatedProviderData) + } + isSaving={isSavingProvider} + isProviderConfigured={configuredProviders.some( + (p) => p.name === currentProvider.name, + )} + handleDiscardChanges={handleDiscardChanges} + /> + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProviderPreview/index.ts b/refact-agent/gui/src/features/Providers/ProviderPreview/index.ts new file mode 100644 index 000000000..cd2ff1f29 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderPreview/index.ts @@ -0,0 +1 @@ +export { ProviderPreview } from "./ProviderPreview"; diff --git a/refact-agent/gui/src/features/Providers/ProviderPreview/useProviderPreview.ts b/refact-agent/gui/src/features/Providers/ProviderPreview/useProviderPreview.ts new file mode 100644 index 000000000..1814e85be --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderPreview/useProviderPreview.ts @@ -0,0 +1,94 @@ +import { useCallback, useState } from "react"; + +import { useAppDispatch } from "../../../hooks"; +import { + useDeleteProviderMutation, + useUpdateProviderMutation, +} from "../../../hooks/useProvidersQuery"; + +import { setInformation } from "../../Errors/informationSlice"; +import { providersApi } from "../../../services/refact"; + +import { getProviderName } from "../getProviderName"; + +import type { Provider, SimplifiedProvider } from "../../../services/refact"; + +export function useProviderPreview( + handleSetCurrentProvider: ( + provider: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + > | null, + ) => void, +) { + const dispatch = useAppDispatch(); + + const [isSavingProvider, setIsSavingProvider] = useState(false); + const [isDeletingProvider, setIsDeletingProvider] = useState(false); + + const updateProvider = useUpdateProviderMutation(); + const deleteProvider = useDeleteProviderMutation(); + + const handleSaveChanges = useCallback( + async (updatedProviderData: Provider) => { + setIsSavingProvider(true); + const response = await updateProvider(updatedProviderData); + if (response.error) { + setIsSavingProvider(false); + return; + } + const actions = [ + setInformation( + `Provider ${getProviderName( + updatedProviderData, + )} updated successfully`, + ), + providersApi.util.invalidateTags([ + "PROVIDER", + { type: "CONFIGURED_PROVIDERS", id: "LIST" }, + ]), + ]; + actions.forEach((action) => dispatch(action)); + setIsSavingProvider(false); + }, + [dispatch, updateProvider], + ); + + const handleDeleteProvider = useCallback( + async (providerName: string) => { + setIsDeletingProvider(true); + const response = await deleteProvider(providerName); + + if (response.error) { + setIsDeletingProvider(false); + return; + } + + const actions = [ + setInformation( + `${getProviderName( + providerName, + )}'s Provider configuration was deleted successfully`, + ), + providersApi.util.resetApiState(), + ]; + + actions.forEach((action) => dispatch(action)); + handleSetCurrentProvider(null); + setIsDeletingProvider(false); + }, + [dispatch, deleteProvider, handleSetCurrentProvider], + ); + + const handleDiscardChanges = useCallback(() => { + handleSetCurrentProvider(null); + }, [handleSetCurrentProvider]); + + return { + updateProvider, + handleDeleteProvider, + handleDiscardChanges, + handleSaveChanges, + isSavingProvider, + isDeletingProvider, + }; +} diff --git a/refact-agent/gui/src/features/Providers/ProviderUpdateContext.tsx b/refact-agent/gui/src/features/Providers/ProviderUpdateContext.tsx new file mode 100644 index 000000000..348b01cb0 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProviderUpdateContext.tsx @@ -0,0 +1,57 @@ +import React, { + createContext, + useContext, + useState, + ReactNode, + useMemo, + useCallback, +} from "react"; + +type ProviderUpdateState = { + updatingProviders: Record; + setProviderUpdating: (providerName: string, isUpdating: boolean) => void; +}; + +const ProviderUpdateContext = createContext( + undefined, +); + +export const ProviderUpdateProvider: React.FC<{ children: ReactNode }> = ({ + children, +}) => { + const [updatingProviders, setUpdatingProviders] = useState< + Record + >({}); + + const setProviderUpdating = useCallback( + (providerName: string, isUpdating: boolean) => { + setUpdatingProviders((prev) => ({ + ...prev, + [providerName]: isUpdating, + })); + }, + [], + ); + + const value = useMemo( + () => ({ updatingProviders, setProviderUpdating }), + [updatingProviders, setProviderUpdating], + ); + + return ( + + {children} + + ); +}; + +// eslint-disable-next-line react-refresh/only-export-components +export const useProviderUpdateContext = (): ProviderUpdateState => { + const context = useContext(ProviderUpdateContext); + if (context === undefined) { + throw new Error( + "useProviderUpdateContext must be used within a ProviderUpdateProvider", + ); + } + return context; +}; diff --git a/refact-agent/gui/src/features/Providers/Providers.tsx b/refact-agent/gui/src/features/Providers/Providers.tsx new file mode 100644 index 000000000..5b6b83413 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/Providers.tsx @@ -0,0 +1,68 @@ +import React from "react"; +import { Flex, Button } from "@radix-ui/themes"; +import { ArrowLeftIcon } from "@radix-ui/react-icons"; + +import { ScrollArea } from "../../components/ScrollArea"; +import { PageWrapper } from "../../components/PageWrapper"; +import { Spinner } from "../../components/Spinner"; +import { ProvidersView } from "./ProvidersView"; +import { ProviderUpdateProvider } from "./ProviderUpdateContext"; + +import { useGetConfiguredProvidersQuery } from "../../hooks/useProvidersQuery"; + +import type { Config } from "../Config/configSlice"; + +export type ProvidersProps = { + backFromProviders: () => void; + host: Config["host"]; + tabbed: Config["tabbed"]; +}; +export const Providers: React.FC = ({ + backFromProviders, + host, + tabbed, +}) => { + const { data: configuredProvidersData, isSuccess } = + useGetConfiguredProvidersQuery(); + + if (!isSuccess) return ; + return ( + + {host === "vscode" && !tabbed ? ( + + + + ) : ( + + )} + + + + + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/ConfiguredProvidersView.tsx b/refact-agent/gui/src/features/Providers/ProvidersView/ConfiguredProvidersView.tsx new file mode 100644 index 000000000..d142abe7f --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProvidersView/ConfiguredProvidersView.tsx @@ -0,0 +1,81 @@ +import React from "react"; + +import { Button, Flex, Heading, Select, Text } from "@radix-ui/themes"; +import { ProviderCard } from "../ProviderCard/ProviderCard"; + +import type { ConfiguredProvidersResponse } from "../../../services/refact"; +import { getProviderName } from "../getProviderName"; +import { useGetConfiguredProvidersView } from "./useConfiguredProvidersView"; + +export type ConfiguredProvidersViewProps = { + configuredProviders: ConfiguredProvidersResponse["providers"]; + handleSetCurrentProvider: ( + provider: ConfiguredProvidersResponse["providers"][number], + ) => void; +}; + +export const ConfiguredProvidersView: React.FC< + ConfiguredProvidersViewProps +> = ({ configuredProviders, handleSetCurrentProvider }) => { + const { + handleAddNewProvider, + handlePotentialCurrentProvider, + notConfiguredProviderTemplates, + sortedConfiguredProviders, + potentialCurrentProvider, + } = useGetConfiguredProvidersView({ + configuredProviders, + handleSetCurrentProvider, + }); + + return ( + + + + + Configured Providers + + + Here you can navigate through the list of configured and available + providers + + + {sortedConfiguredProviders.map((provider, idx) => ( + + ))} + + {notConfiguredProviderTemplates.length > 0 && ( + + + Add new provider + + + + + {notConfiguredProviderTemplates.map((provider) => { + return ( + + {getProviderName(provider)} + + ); + })} + + + {potentialCurrentProvider && ( + + )} + + )} + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.module.css b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.module.css new file mode 100644 index 000000000..42ef91e28 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.module.css @@ -0,0 +1,14 @@ +.popup { + position: fixed; + max-width: max-content; + width: 80%; + left: 50%; + transform: translateX(-50%); + background-color: var(--accent-3); + bottom: 65px; +} + +/* styles for IDEs (padding for pages varies on config.host (vscode, jetbrains, web) */ +.popup_ide { + width: calc(100vw - var(--space-2) * 2); +} diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.tsx b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.tsx new file mode 100644 index 000000000..ec24f3fc7 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.tsx @@ -0,0 +1,94 @@ +import React, { useCallback, useState } from "react"; +import { Flex } from "@radix-ui/themes"; + +import { ConfiguredProvidersView } from "./ConfiguredProvidersView"; + +import type { + ConfiguredProvidersResponse, + SimplifiedProvider, +} from "../../../services/refact"; +import { ProviderPreview } from "../ProviderPreview"; +import { + ErrorCallout, + InformationCallout, +} from "../../../components/Callout/Callout"; +import classNames from "classnames"; +import { useAppDispatch, useAppSelector } from "../../../hooks"; +import { clearError, getErrorMessage } from "../../Errors/errorsSlice"; +import { + clearInformation, + getInformationMessage, +} from "../../Errors/informationSlice"; + +import styles from "./ProvidersView.module.css"; +import { selectConfig } from "../../Config/configSlice"; + +export type ProvidersViewProps = { + configuredProviders: ConfiguredProvidersResponse["providers"]; +}; + +export const ProvidersView: React.FC = ({ + configuredProviders, +}) => { + const dispatch = useAppDispatch(); + + const currentHost = useAppSelector(selectConfig).host; + const globalError = useAppSelector(getErrorMessage); + const information = useAppSelector(getInformationMessage); + + const [currentProvider, setCurrentProvider] = useState | null>(null); + const handleSetCurrentProvider = useCallback( + ( + provider: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + > | null, + ) => { + setCurrentProvider(provider); + }, + [], + ); + + return ( + + {!currentProvider && ( + + )} + {currentProvider && ( + + )} + {information && ( + dispatch(clearInformation())} + className={classNames(styles.popup, { + [styles.popup_ide]: currentHost !== "web", + })} + > + {information} + + )} + {globalError && ( + dispatch(clearError())} + className={classNames(styles.popup, { + [styles.popup_ide]: currentHost !== "web", + })} + > + {globalError} + + )} + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/index.ts b/refact-agent/gui/src/features/Providers/ProvidersView/index.ts new file mode 100644 index 000000000..c17996029 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProvidersView/index.ts @@ -0,0 +1,2 @@ +export { ProvidersView } from "./ProvidersView"; +export { ConfiguredProvidersView } from "./ConfiguredProvidersView"; diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/useConfiguredProvidersView.tsx b/refact-agent/gui/src/features/Providers/ProvidersView/useConfiguredProvidersView.tsx new file mode 100644 index 000000000..ade797242 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/ProvidersView/useConfiguredProvidersView.tsx @@ -0,0 +1,84 @@ +import { useCallback, useEffect, useMemo, useState } from "react"; +import type { SimplifiedProvider } from "../../../services/refact"; +import { useGetProviderTemplatesQuery } from "../../../hooks/useProvidersQuery"; +import { ConfiguredProvidersViewProps } from "./ConfiguredProvidersView"; + +export function useGetConfiguredProvidersView({ + configuredProviders, + handleSetCurrentProvider, +}: { + configuredProviders: ConfiguredProvidersViewProps["configuredProviders"]; + handleSetCurrentProvider: ConfiguredProvidersViewProps["handleSetCurrentProvider"]; +}) { + const { data: providerTemplatesData } = useGetProviderTemplatesQuery(); + + const notConfiguredProviderTemplates = useMemo(() => { + return providerTemplatesData + ? providerTemplatesData.provider_templates.reduce< + SimplifiedProvider<"name">[] + >((acc, provider) => { + if (!configuredProviders.some((p) => p.name === provider.name)) + acc.push(provider); + return acc; + }, []) + : []; + }, [configuredProviders, providerTemplatesData]); + + const [potentialCurrentProvider, setPotentialCurrentProvider] = useState< + SimplifiedProvider<"name"> | undefined + >(notConfiguredProviderTemplates[0] || undefined); + + const sortedConfiguredProviders = useMemo(() => { + return [...configuredProviders].sort((a, b) => { + const getPriority = (provider: { name: string }) => { + if ( + provider.name === "refact" || + provider.name === "refact_self_hosted" + ) + return 0; + if (provider.name === "custom") return 2; + return 1; + }; + + const priorityA = getPriority(a); + const priorityB = getPriority(b); + + if (priorityA !== priorityB) { + return priorityA - priorityB; + } + + return a.name.localeCompare(b.name); + }); + }, [configuredProviders]); + + const handlePotentialCurrentProvider = useCallback((value: string) => { + setPotentialCurrentProvider({ + name: value, + }); + }, []); + + const handleAddNewProvider = useCallback(() => { + if (!potentialCurrentProvider) return; + + handleSetCurrentProvider({ + name: potentialCurrentProvider.name, + enabled: true, + readonly: false, + supports_completion: false, + }); + }, [handleSetCurrentProvider, potentialCurrentProvider]); + + useEffect(() => { + if (notConfiguredProviderTemplates.length > 0) { + setPotentialCurrentProvider(notConfiguredProviderTemplates[0]); + } + }, [notConfiguredProviderTemplates]); + + return { + handlePotentialCurrentProvider, + handleAddNewProvider, + sortedConfiguredProviders, + notConfiguredProviderTemplates, + potentialCurrentProvider, + }; +} diff --git a/refact-agent/gui/src/features/Providers/constants.ts b/refact-agent/gui/src/features/Providers/constants.ts new file mode 100644 index 000000000..1ac6e76b1 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/constants.ts @@ -0,0 +1,14 @@ +export const BEAUTIFUL_PROVIDER_NAMES: Record = { + refact: "Refact Cloud", + refact_self_hosted: "Refact Self-Hosted", + openai: "OpenAI", + openrouter: "OpenRouter", + groq: "Groq", // not sure about this one + anthropic: "Anthropic", + deepseek: "DeepSeek", + google_gemini: "Google Gemini", + ollama: "Ollama", + lmstudio: "LM Studio", + xai: "xAI", + custom: "Custom Provider", +}; diff --git a/refact-agent/gui/src/features/Providers/getProviderName.ts b/refact-agent/gui/src/features/Providers/getProviderName.ts new file mode 100644 index 000000000..a299e27fd --- /dev/null +++ b/refact-agent/gui/src/features/Providers/getProviderName.ts @@ -0,0 +1,10 @@ +import type { SimplifiedProvider } from "../../services/refact"; +import { BEAUTIFUL_PROVIDER_NAMES } from "./constants"; + +export function getProviderName(provider: SimplifiedProvider | string): string { + if (typeof provider === "string") return BEAUTIFUL_PROVIDER_NAMES[provider]; + const maybeName = provider.name; + if (!maybeName) return "Unknown Provider"; // TODO: throw error or think through it more + const beautyName = BEAUTIFUL_PROVIDER_NAMES[maybeName] as string | undefined; + return beautyName ? beautyName : maybeName; +} diff --git a/refact-agent/gui/src/features/Providers/icons/Anthropic.tsx b/refact-agent/gui/src/features/Providers/icons/Anthropic.tsx new file mode 100644 index 000000000..db5806577 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Anthropic.tsx @@ -0,0 +1,20 @@ +import { FC, SVGProps } from "react"; + +export const AnthropicIcon: FC> = (props) => { + return ( + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/Custom.tsx b/refact-agent/gui/src/features/Providers/icons/Custom.tsx new file mode 100644 index 000000000..1f29774f6 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Custom.tsx @@ -0,0 +1,17 @@ +import { FC, SVGProps } from "react"; + +export const CustomIcon: FC> = (props) => { + return ( + + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/DeepSeek.tsx b/refact-agent/gui/src/features/Providers/icons/DeepSeek.tsx new file mode 100644 index 000000000..73416f3fd --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/DeepSeek.tsx @@ -0,0 +1,17 @@ +import { FC, SVGProps } from "react"; + +export const DeepSeekIcon: FC> = (props) => { + return ( + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/Gemini.tsx b/refact-agent/gui/src/features/Providers/icons/Gemini.tsx new file mode 100644 index 000000000..b2a8e7a44 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Gemini.tsx @@ -0,0 +1,32 @@ +import { FC, SVGProps } from "react"; + +export const GeminiIcon: FC> = (props) => { + return ( + + + + + + + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/Groq.tsx b/refact-agent/gui/src/features/Providers/icons/Groq.tsx new file mode 100644 index 000000000..73a7a2f23 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Groq.tsx @@ -0,0 +1,16 @@ +import { FC, SVGProps } from "react"; + +export const GroqIcon: FC> = (props) => { + return ( + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/LMStudio.tsx b/refact-agent/gui/src/features/Providers/icons/LMStudio.tsx new file mode 100644 index 000000000..f9f18c573 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/LMStudio.tsx @@ -0,0 +1,25 @@ +import { FC, SVGProps } from "react"; + +export const LMStudioIcon: FC> = (props) => { + return ( + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/Ollama.tsx b/refact-agent/gui/src/features/Providers/icons/Ollama.tsx new file mode 100644 index 000000000..8dcd31b6b --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Ollama.tsx @@ -0,0 +1,20 @@ +import { FC, SVGProps } from "react"; + +export const OllamaIcon: FC> = (props) => { + return ( + + + + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/OpenAI.tsx b/refact-agent/gui/src/features/Providers/icons/OpenAI.tsx new file mode 100644 index 000000000..3c1c670e6 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/OpenAI.tsx @@ -0,0 +1,17 @@ +import { FC, SVGProps } from "react"; + +export const OpenAIIcon: FC> = (props) => { + return ( + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/OpenRouter.tsx b/refact-agent/gui/src/features/Providers/icons/OpenRouter.tsx new file mode 100644 index 000000000..12af65160 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/OpenRouter.tsx @@ -0,0 +1,28 @@ +import { FC, SVGProps } from "react"; + +export const OpenRouterIcon: FC> = (props) => { + return ( + + + + + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/Refact.tsx b/refact-agent/gui/src/features/Providers/icons/Refact.tsx new file mode 100644 index 000000000..c2927fce4 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Refact.tsx @@ -0,0 +1,24 @@ +import { FC, SVGProps } from "react"; + +export const RefactIcon: FC> = (props) => { + return ( + + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/Xai.tsx b/refact-agent/gui/src/features/Providers/icons/Xai.tsx new file mode 100644 index 000000000..9340fd6e5 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/Xai.tsx @@ -0,0 +1,16 @@ +import { FC, SVGProps } from "react"; + +export const XaiIcon: FC> = (props) => { + return ( + + + + ); +}; diff --git a/refact-agent/gui/src/features/Providers/icons/iconsMap.tsx b/refact-agent/gui/src/features/Providers/icons/iconsMap.tsx new file mode 100644 index 000000000..922703e57 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/icons/iconsMap.tsx @@ -0,0 +1,26 @@ +import { AnthropicIcon } from "./Anthropic"; +import { CustomIcon } from "./Custom"; +import { DeepSeekIcon } from "./DeepSeek"; +import { GeminiIcon } from "./Gemini"; +import { GroqIcon } from "./Groq"; +import { LMStudioIcon } from "./LMStudio"; +import { OllamaIcon } from "./Ollama"; +import { OpenAIIcon } from "./OpenAI"; +import { OpenRouterIcon } from "./OpenRouter"; +import { RefactIcon } from "./Refact"; +import { XaiIcon } from "./Xai"; + +export const iconsMap: Record = { + refact: , + refact_self_hosted: , + openai: , + anthropic: , + google_gemini: , + openrouter: , + deepseek: , + groq: , + ollama: , + lmstudio: , + xai: , + custom: , +}; diff --git a/refact-agent/gui/src/features/Providers/index.ts b/refact-agent/gui/src/features/Providers/index.ts new file mode 100644 index 000000000..68b19e53b --- /dev/null +++ b/refact-agent/gui/src/features/Providers/index.ts @@ -0,0 +1 @@ +export { Providers } from "./Providers"; diff --git a/refact-agent/gui/src/features/Providers/useUpdateProvider.ts b/refact-agent/gui/src/features/Providers/useUpdateProvider.ts new file mode 100644 index 000000000..52d42b141 --- /dev/null +++ b/refact-agent/gui/src/features/Providers/useUpdateProvider.ts @@ -0,0 +1,80 @@ +import { useCallback } from "react"; + +import { providersApi } from "../../services/refact"; +import { useAppDispatch } from "../../hooks"; + +import { getProviderName } from "./getProviderName"; +import { setError } from "../../features/Errors/errorsSlice"; +import { useProviderUpdateContext } from "./ProviderUpdateContext"; + +import type { ProviderCardProps } from "./ProviderCard"; + +export const useUpdateProvider = ({ + provider, +}: { + provider: ProviderCardProps["provider"]; +}) => { + const dispatch = useAppDispatch(); + const { updatingProviders, setProviderUpdating } = useProviderUpdateContext(); + + const [getProviderData] = providersApi.useLazyGetProviderQuery(); + const [saveProviderData] = providersApi.useUpdateProviderMutation(); + + // Use the provider name as the key to track state + // then get updating state from context + const providerKey = provider.name; + const isUpdatingEnabledState = updatingProviders[providerKey] || false; + + const updateProviderEnabledState = useCallback(async () => { + setProviderUpdating(providerKey, true); + + const { data: providerData } = await getProviderData({ + providerName: provider.name, + }); + + if (!providerData) { + setProviderUpdating(providerKey, false); + return; + } + + const enabled = providerData.enabled; + + const response = await saveProviderData({ + ...providerData, + enabled: !enabled, + }); + + if (response.error) { + dispatch( + setError( + `Error occurred on updating ${getProviderName( + provider, + )} configuration. Check if your provider configuration is correct`, + ), + ); + setProviderUpdating(providerKey, false); + return; + } + + dispatch( + providersApi.util.invalidateTags([ + { type: "CONFIGURED_PROVIDERS", id: "LIST" }, + ]), + ); + setTimeout(() => { + setProviderUpdating(providerKey, false); + }, 500); + }, [ + dispatch, + getProviderData, + saveProviderData, + provider, + providerKey, + setProviderUpdating, + ]); + + return { + updateProviderEnabledState, + isUpdatingEnabledState, + }; +}; diff --git a/refact-agent/gui/src/hooks/useCanUseTools.ts b/refact-agent/gui/src/hooks/useCanUseTools.ts index 7891cc2fa..e781175bd 100644 --- a/refact-agent/gui/src/hooks/useCanUseTools.ts +++ b/refact-agent/gui/src/hooks/useCanUseTools.ts @@ -3,7 +3,7 @@ import { useAppSelector } from "./useAppSelector"; import { useGetToolsQuery } from "./useGetToolsQuery"; import { useGetCapsQuery } from "./useGetCapsQuery"; import { selectModel } from "../features/Chat/Thread/selectors"; -import { CodeChatModel } from "../services/refact/caps"; +import { CodeChatModel } from "../services/refact/models"; export const useCanUseTools = () => { const capsRequest = useGetCapsQuery(); @@ -19,10 +19,10 @@ export const useCanUseTools = () => { if (!capsRequest.data) return false; if (!toolsRequest.data) return false; if (toolsRequest.data.length === 0) return false; - const modelName = chatModel || capsRequest.data.code_chat_default_model; + const modelName = chatModel || capsRequest.data.chat_default_model; - if (!(modelName in capsRequest.data.code_chat_models)) return false; - const model: CodeChatModel = capsRequest.data.code_chat_models[modelName]; + if (!(modelName in capsRequest.data.chat_models)) return false; + const model: CodeChatModel = capsRequest.data.chat_models[modelName]; if ("supports_tools" in model && model.supports_tools) return true; return false; }, [capsRequest.data, toolsRequest.data, chatModel]); diff --git a/refact-agent/gui/src/hooks/useCapsForToolUse.ts b/refact-agent/gui/src/hooks/useCapsForToolUse.ts index e97d2294d..0a6046f9a 100644 --- a/refact-agent/gui/src/hooks/useCapsForToolUse.ts +++ b/refact-agent/gui/src/hooks/useCapsForToolUse.ts @@ -30,7 +30,7 @@ export function useCapsForToolUse() { const toolUse = useAppSelector(selectThreadToolUse); const dispatch = useAppDispatch(); - const defaultCap = caps.data?.code_chat_default_model ?? ""; + const defaultCap = caps.data?.chat_default_model ?? ""; const selectedModel = useAppSelector(getSelectedChatModel); @@ -38,40 +38,40 @@ export function useCapsForToolUse() { const setCapModel = useCallback( (value: string) => { - const model = caps.data?.code_chat_default_model === value ? "" : value; + const model = caps.data?.chat_default_model === value ? "" : value; const action = setChatModel(model); dispatch(action); const tokens = - caps.data?.code_chat_models[value]?.n_ctx ?? DEFAULT_MAX_NEW_TOKENS; + caps.data?.chat_models[value]?.n_ctx ?? DEFAULT_MAX_NEW_TOKENS; dispatch(setMaxNewTokens(tokens)); }, - [caps.data?.code_chat_default_model, caps.data?.code_chat_models, dispatch], + [caps.data?.chat_default_model, caps.data?.chat_models, dispatch], ); const isMultimodalitySupportedForCurrentModel = useMemo(() => { - const models = caps.data?.code_chat_models; + const models = caps.data?.chat_models; const item = models?.[currentModel]; if (!item) return false; if (!item.supports_multimodality) return false; return true; - }, [caps.data?.code_chat_models, currentModel]); + }, [caps.data?.chat_models, currentModel]); const modelsSupportingTools = useMemo(() => { - const models = caps.data?.code_chat_models ?? {}; + const models = caps.data?.chat_models ?? {}; return Object.entries(models) .filter(([_, value]) => value.supports_tools) .map(([key]) => key); - }, [caps.data?.code_chat_models]); + }, [caps.data?.chat_models]); const modelsSupportingAgent = useMemo(() => { - const models = caps.data?.code_chat_models ?? {}; + const models = caps.data?.chat_models ?? {}; return Object.entries(models) .filter(([_, value]) => value.supports_agent) .map(([key]) => key); - }, [caps.data?.code_chat_models]); + }, [caps.data?.chat_models]); const usableModels = useMemo(() => { - const models = caps.data?.code_chat_models ?? {}; + const models = caps.data?.chat_models ?? {}; const items = Object.entries(models).reduce( (acc, [key, value]) => { if (toolUse === "explore" && value.supports_tools) { @@ -84,7 +84,7 @@ export function useCapsForToolUse() { [], ); return items; - }, [caps.data?.code_chat_models, toolUse]); + }, [caps.data?.chat_models, toolUse]); const usableModelsForPlan = useMemo(() => { // TODO: keep filtering logic for the future BYOK + Cloud (to show different providers) diff --git a/refact-agent/gui/src/hooks/useEventBusForIDE.ts b/refact-agent/gui/src/hooks/useEventBusForIDE.ts index a45290a9d..40f7ea170 100644 --- a/refact-agent/gui/src/hooks/useEventBusForIDE.ts +++ b/refact-agent/gui/src/hooks/useEventBusForIDE.ts @@ -47,6 +47,10 @@ export const ideEscapeKeyPressed = createAction("ide/escapeKeyPressed"); export const ideIsChatStreaming = createAction("ide/isChatStreaming"); export const ideIsChatReady = createAction("ide/isChatReady"); +export const ideSetCodeCompletionModel = createAction( + "ide/setCodeCompletionModel", +); + export const ideForceReloadFileByPath = createAction( "ide/forceReloadFileByPath", ); @@ -202,10 +206,17 @@ export const useEventsBusForIDE = () => { [postMessage], ); + const setCodeCompletionModel = useCallback( + (model: string) => { + const action = ideSetCodeCompletionModel(model); + postMessage(action); + }, + [postMessage], + ); + const [getCustomizationPath] = pathApi.useLazyCustomizationPathQuery(); const [getIntegrationsPath] = pathApi.useLazyIntegrationsPathQuery(); const [getPrivacyPath] = pathApi.useLazyPrivacyPathQuery(); - const [getBringYourOwnKeyPath] = pathApi.useLazyBringYourOwnKeyPathQuery(); // Creating a generic function to trigger different queries from RTK Query (to avoid duplicative code) const openFileFromPathQuery = useCallback( @@ -242,9 +253,6 @@ export const useEventsBusForIDE = () => { const openPrivacyFile = () => openFileFromPathQuery(getPrivacyPath); const openIntegrationsFile = () => openFileFromPathQuery(getIntegrationsPath); - const openBringYourOwnKeyFile = () => - openFileFromPathQuery(getBringYourOwnKeyPath); - const sendToolCallToIde = useCallback( (toolCall: TextDocToolCall, edit: ToolEditResult, chatId: string) => { const action = ideToolCall({ toolCall, edit, chatId }); @@ -264,7 +272,6 @@ export const useEventsBusForIDE = () => { queryPathThenOpenFile, openCustomizationFile, openPrivacyFile, - openBringYourOwnKeyFile, openIntegrationsFile, stopFileAnimation, startFileAnimation, @@ -274,5 +281,6 @@ export const useEventsBusForIDE = () => { setIsChatReady, setForceReloadFileByPath, sendToolCallToIde, + setCodeCompletionModel, }; }; diff --git a/refact-agent/gui/src/hooks/useEventBusForWeb.ts b/refact-agent/gui/src/hooks/useEventBusForWeb.ts index 1e3654fc4..6ac86129c 100644 --- a/refact-agent/gui/src/hooks/useEventBusForWeb.ts +++ b/refact-agent/gui/src/hooks/useEventBusForWeb.ts @@ -36,7 +36,7 @@ export function useEventBusForWeb() { } else if (host.type === "self") { setAddressURL(host.endpointAddress); setApiKey("any-will-work-for-local-server"); - } else if (host.type === "enterprise") { + } else { setAddressURL(host.endpointAddress); setApiKey(host.apiKey); } diff --git a/refact-agent/gui/src/hooks/useLinksFromLsp.ts b/refact-agent/gui/src/hooks/useLinksFromLsp.ts index 8cf0238a1..6715c2e8c 100644 --- a/refact-agent/gui/src/hooks/useLinksFromLsp.ts +++ b/refact-agent/gui/src/hooks/useLinksFromLsp.ts @@ -45,8 +45,7 @@ export function useGetLinksFromLsp() { // TODO: add the model const caps = useGetCapsQuery(); - const model = - useAppSelector(selectModel) || caps.data?.code_chat_default_model; + const model = useAppSelector(selectModel) || caps.data?.chat_default_model; const unCalledTools = React.useMemo(() => { if (messages.length === 0) return false; diff --git a/refact-agent/gui/src/hooks/useModelsQuery.ts b/refact-agent/gui/src/hooks/useModelsQuery.ts new file mode 100644 index 000000000..ff3bd73e4 --- /dev/null +++ b/refact-agent/gui/src/hooks/useModelsQuery.ts @@ -0,0 +1,38 @@ +import { modelsApi } from "../services/refact"; + +import type { GetModelArgs, GetModelDefaultsArgs } from "../services/refact"; + +export function useGetModelsByProviderNameQuery({ + providerName, +}: { + providerName: string; +}) { + return modelsApi.useGetModelsQuery({ providerName }); +} + +export function useGetModelConfiguration(args: GetModelArgs) { + return modelsApi.useGetModelQuery(args, { skip: !args.modelName }); +} + +export function useGetModelDefaults(args: GetModelDefaultsArgs) { + return modelsApi.useGetModelDefaultsQuery(args, { skip: !args.providerName }); +} + +export function useGetCompletionModelFamiliesQuery() { + return modelsApi.useGetCompletionModelFamiliesQuery(undefined); +} + +export function useGetLazyModelConfiguration() { + const [mutationTrigger] = modelsApi.useLazyGetModelQuery(); + return mutationTrigger; +} + +export function useUpdateModelMutation() { + const [mutationTrigger] = modelsApi.useUpdateModelMutation(); + return mutationTrigger; +} + +export function useDeleteModelMutation() { + const [mutationTrigger] = modelsApi.useDeleteModelMutation(); + return mutationTrigger; +} diff --git a/refact-agent/gui/src/hooks/useProvidersQuery.ts b/refact-agent/gui/src/hooks/useProvidersQuery.ts new file mode 100644 index 000000000..c1081843d --- /dev/null +++ b/refact-agent/gui/src/hooks/useProvidersQuery.ts @@ -0,0 +1,27 @@ +import { providersApi } from "../services/refact"; + +export function useGetConfiguredProvidersQuery() { + return providersApi.useGetConfiguredProvidersQuery(undefined); +} + +export function useGetProviderTemplatesQuery() { + return providersApi.useGetProviderTemplatesQuery(undefined); +} + +export function useGetProviderQuery({ + providerName, +}: { + providerName: string; +}) { + return providersApi.useGetProviderQuery({ providerName }); +} + +export function useUpdateProviderMutation() { + const [mutationTrigger] = providersApi.useUpdateProviderMutation(); + return mutationTrigger; +} + +export function useDeleteProviderMutation() { + const [mutationTrigger] = providersApi.useDeleteProviderMutation(); + return mutationTrigger; +} diff --git a/refact-agent/gui/src/hooks/useThinking.ts b/refact-agent/gui/src/hooks/useThinking.ts index 354133f6e..6fdd05892 100644 --- a/refact-agent/gui/src/hooks/useThinking.ts +++ b/refact-agent/gui/src/hooks/useThinking.ts @@ -24,10 +24,10 @@ export function useThinking() { const { data: userData } = useGetUser(); const supportsBoostReasoning = useMemo(() => { - const models = caps.data?.code_chat_models; + const models = caps.data?.chat_models; const item = models?.[caps.currentModel]; return item?.supports_boost_reasoning ?? false; - }, [caps.data?.code_chat_models, caps.currentModel]); + }, [caps.data?.chat_models, caps.currentModel]); const shouldBeTeasing = useMemo( () => userData?.inference === "FREE", diff --git a/refact-agent/gui/src/services/refact/caps.ts b/refact-agent/gui/src/services/refact/caps.ts index 4abcfb8a6..e97659223 100644 --- a/refact-agent/gui/src/services/refact/caps.ts +++ b/refact-agent/gui/src/services/refact/caps.ts @@ -1,6 +1,7 @@ import { RootState } from "../../app/store"; import { CAPS_URL } from "./consts"; import { createApi, fetchBaseQuery } from "@reduxjs/toolkit/query/react"; +import { CodeChatModel, CodeCompletionModel, EmbeddingModel } from "./models"; export const capsApi = createApi({ reducerPath: "caps", @@ -48,59 +49,38 @@ export const capsApi = createApi({ export const capsEndpoints = capsApi.endpoints; -export type CodeChatModel = { - default_scratchpad: string; - n_ctx: number; - similar_models: string[]; - supports_tools?: boolean | null | undefined; - supports_scratchpads: Record< - string, - { - default_system_message?: string; - } - >; - supports_multimodality?: boolean; - supports_clicks?: boolean; - // TODO: could be defined - supports_agent?: boolean; - supports_boost_reasoning?: boolean; -}; - -export type CodeCompletionModel = { - default_scratchpad: string; - n_ctx: number; - similar_models: string[]; - supports_scratchpads: Record>; - supports_tools?: boolean; - supports_multimodality?: boolean; - supports_clicks?: boolean; -}; - export type CapsResponse = { caps_version: number; cloud_name: string; - code_chat_default_model: string; + + chat_default_model: string; + chat_models: Record; code_chat_default_system_prompt: string; - code_chat_models: Record; - code_completion_default_model: string; - code_completion_models: Record; + completion_models: Record; + completion_default_model: string; code_completion_n_ctx: number; + embedding_model?: EmbeddingModel; + chat_thinking_model: string; + chat_light_model: string; + endpoint_chat_passthrough: string; endpoint_style: string; endpoint_template: string; running_models: string[]; telemetry_basic_dest: string; tokenizer_path_template: string; + telemetry_basic_retrieve_my_own: string; tokenizer_rewrite_path: Record; support_metadata: boolean; + customization: string; }; export function isCapsResponse(json: unknown): json is CapsResponse { if (!json) return false; if (typeof json !== "object") return false; - if (!("code_chat_default_model" in json)) return false; - if (typeof json.code_chat_default_model !== "string") return false; - if (!("code_chat_models" in json)) return false; + if (!("chat_default_model" in json)) return false; + if (typeof json.chat_default_model !== "string") return false; + if (!("chat_models" in json)) return false; return true; } diff --git a/refact-agent/gui/src/services/refact/commands.ts b/refact-agent/gui/src/services/refact/commands.ts index b95944bba..ddb9aa915 100644 --- a/refact-agent/gui/src/services/refact/commands.ts +++ b/refact-agent/gui/src/services/refact/commands.ts @@ -160,6 +160,10 @@ export function isCommandCompletionResponse( export type DetailMessage = { detail: string; }; + +export type DetailMessageWithErrorType = DetailMessage & { + errorType: "CHAT" | "GLOBAL"; +}; export function isDetailMessage(json: unknown): json is DetailMessage { if (!json) return false; if (typeof json !== "object") return false; @@ -167,6 +171,16 @@ export function isDetailMessage(json: unknown): json is DetailMessage { return true; } +export function isDetailMessageWithErrorType( + json: unknown, +): json is DetailMessageWithErrorType { + if (!json) return false; + if (typeof json !== "object") return false; + if (!("detail" in json)) return false; + if (!("errorType" in json)) return false; + return true; +} + export type CommandPreviewContent = { content: string; role: "context_file" | "plain_text"; diff --git a/refact-agent/gui/src/services/refact/consts.ts b/refact-agent/gui/src/services/refact/consts.ts index 2c1ce6a49..303aa148e 100644 --- a/refact-agent/gui/src/services/refact/consts.ts +++ b/refact-agent/gui/src/services/refact/consts.ts @@ -43,3 +43,13 @@ export const KNOWLEDGE_UPDATE_URL = "/v1/mem-upd"; export const KNOWLEDGE_CREATE_URL = "/v1/trajectory-save"; export const COMPRESS_MESSAGES_URL = "/v1/trajectory-compress"; + +// Providers & Models +export const CONFIGURED_PROVIDERS_URL = "/v1/providers"; +export const PROVIDER_TEMPLATES_URL = "/v1/provider-templates"; +export const PROVIDER_URL = "/v1/provider"; + +export const MODELS_URL = "/v1/models"; +export const MODEL_URL = "/v1/model"; +export const MODEL_DEFAULTS_URL = "/v1/model-defaults"; +export const COMPLETION_MODEL_FAMILIES_URL = "/v1/completion-model-families"; diff --git a/refact-agent/gui/src/services/refact/index.ts b/refact-agent/gui/src/services/refact/index.ts index 6153995d8..92f3c4213 100644 --- a/refact-agent/gui/src/services/refact/index.ts +++ b/refact-agent/gui/src/services/refact/index.ts @@ -1,4 +1,6 @@ export * from "./caps"; +export * from "./providers"; +export * from "./models"; export * from "./chat"; export * from "./commands"; export * from "./fim"; diff --git a/refact-agent/gui/src/services/refact/models.ts b/refact-agent/gui/src/services/refact/models.ts new file mode 100644 index 000000000..91262dd3a --- /dev/null +++ b/refact-agent/gui/src/services/refact/models.ts @@ -0,0 +1,431 @@ +import { RootState } from "../../app/store"; +import { + COMPLETION_MODEL_FAMILIES_URL, + MODEL_DEFAULTS_URL, + MODEL_URL, + MODELS_URL, +} from "./consts"; +import { createApi, fetchBaseQuery } from "@reduxjs/toolkit/query/react"; +import { hasProperty } from "../../utils"; +import { isDetailMessage } from "./commands"; + +export const modelsApi = createApi({ + reducerPath: "models", + tagTypes: ["MODELS", "MODEL"], + baseQuery: fetchBaseQuery({ + prepareHeaders: (headers, { getState }) => { + const token = (getState() as RootState).config.apiKey; + if (token) { + headers.set("Authorization", `Bearer ${token}`); + } + return headers; + }, + }), + endpoints: (builder) => ({ + getModels: builder.query({ + providesTags: ["MODELS"], + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${MODELS_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + params: { + "provider-name": args.providerName, + }, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (!isModelsResponse(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/models", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + getModel: builder.query({ + providesTags: ["MODEL"], + queryFn: async (args, api, extraOptions, baseQuery) => { + const { modelName, modelType, providerName } = args; + + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${MODEL_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + params: { + provider: providerName, + model: modelName, + type: modelType, + }, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (!isModel(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/model", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + getModelDefaults: builder.query({ + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${MODEL_DEFAULTS_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + params: { + provider: args.providerName, + type: args.modelType, + }, + }); + + if (result.error) { + return { error: result.error }; + } + + if (!isModel(result.data)) { + return { + error: { + error: "Invalid response from /v1/model-defaults", + status: "CUSTOM_ERROR", + data: result.data, + }, + }; + } + + return { data: result.data }; + }, + }), + getCompletionModelFamilies: builder.query< + CompletionModelFamiliesResponse, + undefined + >({ + queryFn: async (_args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${COMPLETION_MODEL_FAMILIES_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + }); + + if (result.error) { + return { error: result.error }; + } + + if (!isCompletionModelFamiliesResponse(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/completion-model-families", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + updateModel: builder.mutation({ + invalidatesTags: (_result, _error, args) => [ + { type: "MODEL", id: args.model.name }, + ], + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${MODEL_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "POST", + url, + body: { ...args }, + credentials: "same-origin", + redirect: "follow", + }); + + if (result.error) { + return { error: result.error }; + } + + // TODO: this doesn't really work, RTK Query gets FETCH_ERROR is request is failed and is dropping off actual response from the LSP :/ + if (isDetailMessage(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/model", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + deleteModel: builder.mutation({ + invalidatesTags: (_result, _error, args) => [ + { type: "MODEL", id: args.model }, + ], + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${MODEL_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "DELETE", + url, + params: { ...args }, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (isDetailMessage(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/model", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + }), + refetchOnMountOrArgChange: true, +}); + +export type SimplifiedModel = { + name: string; + enabled: boolean; + removable: boolean; + user_configured: boolean; +}; + +export type ModelsResponse = { + completion_models: SimplifiedModel[]; + chat_models: SimplifiedModel[]; + embedding_model: SimplifiedModel; +}; + +export type ModelType = "embedding" | "completion" | "chat"; + +export type GetModelArgs = { + modelName: string; + providerName: string; + modelType: ModelType; +}; + +export type GetModelDefaultsArgs = Omit; + +export type GetModelsArgs = { + providerName: string; +}; + +export type UpdateModelRequestBody = { + provider: string; + model: Model; + type: ModelType; +}; + +export type DeleteModelRequestBody = Omit & { + model: string; +}; + +export type SupportsReasoningStyle = "openai" | "anthropic" | "deepseek" | null; + +export type CodeChatModel = { + n_ctx: number; + name: string; + tokenizer: string; + id: string; + + supports_tools: boolean; + supports_multimodality: boolean; + supports_clicks: boolean; + supports_agent: boolean; + supports_reasoning: SupportsReasoningStyle; + supports_boost_reasoning: boolean; + default_temperature: number | null; + + enabled: boolean; + + type: "chat"; +}; + +export type CodeCompletionModel = { + n_ctx: number; + name: string; + model_family: string | null; + type: "completion"; + enabled: boolean; +}; + +export type EmbeddingModel = { + n_ctx: number; + name: string; + id: string; + tokenizer: string; + + embedding_size: number; + rejection_threshold: number; + embedding_batch: number; + + enabled: boolean; + + type: "embedding"; +}; + +export function isModelsResponse(data: unknown): data is ModelsResponse { + // Check if data is an object + if (typeof data !== "object" || data === null) return false; + + if ( + !hasProperty(data, "completion_models") || + !hasProperty(data, "chat_models") || + !hasProperty(data, "embedding_model") + ) + return false; + + return true; +} + +export type Model = CodeChatModel | CodeCompletionModel | EmbeddingModel; + +export function isCodeChatModel(data: unknown): data is CodeChatModel { + if (!data || typeof data !== "object") return false; + + if (!("n_ctx" in data) || typeof data.n_ctx !== "number") return false; + if (!("name" in data) || typeof data.name !== "string") return false; + if (!("tokenizer" in data) || typeof data.tokenizer !== "string") + return false; + + if (!("supports_tools" in data) || typeof data.supports_tools !== "boolean") + return false; + if ( + !("supports_multimodality" in data) || + typeof data.supports_multimodality !== "boolean" + ) + return false; + if (!("supports_clicks" in data) || typeof data.supports_clicks !== "boolean") + return false; + if (!("supports_agent" in data) || typeof data.supports_agent !== "boolean") + return false; + + if (!("supports_reasoning" in data)) return false; + + if ( + !("supports_boost_reasoning" in data) || + typeof data.supports_boost_reasoning !== "boolean" + ) + return false; + + if (!("default_temperature" in data)) return false; + if ( + data.default_temperature !== null && + typeof data.default_temperature !== "number" + ) + return false; + + if (!("enabled" in data) || typeof data.enabled !== "boolean") return false; + + return true; +} + +export function isCodeCompletionModel( + data: unknown, +): data is CodeCompletionModel { + if (!data || typeof data !== "object") return false; + + if (!("n_ctx" in data) || typeof data.n_ctx !== "number") return false; + if (!("name" in data) || typeof data.name !== "string") return false; + if ( + "model_family" in data && + typeof data.model_family !== "string" && + data.model_family !== null + ) + return false; + if (!("enabled" in data) || typeof data.enabled !== "boolean") return false; + + return true; +} + +export function isEmbeddingModel(data: unknown): data is EmbeddingModel { + if (!data || typeof data !== "object") return false; + + if (!("n_ctx" in data) || typeof data.n_ctx !== "number") return false; + if (!("name" in data) || typeof data.name !== "string") return false; + if (!("tokenizer" in data) || typeof data.tokenizer !== "string") + return false; + + if (!("embedding_size" in data) || typeof data.embedding_size !== "number") + return false; + if ( + !("rejection_threshold" in data) || + typeof data.rejection_threshold !== "number" + ) + return false; + if (!("embedding_batch" in data) || typeof data.embedding_batch !== "number") + return false; + + if (!("enabled" in data) || typeof data.enabled !== "boolean") return false; + + return true; +} + +export function isModel(data: unknown): data is Model { + return ( + isCodeChatModel(data) || + isCodeCompletionModel(data) || + isEmbeddingModel(data) + ); +} + +export type CompletionModelFamiliesResponse = { model_families: string[] }; + +export function isCompletionModelFamiliesResponse( + data: unknown, +): data is CompletionModelFamiliesResponse { + if (!data || typeof data !== "object") return false; + return "model_families" in data && Array.isArray(data.model_families); +} diff --git a/refact-agent/gui/src/services/refact/path.ts b/refact-agent/gui/src/services/refact/path.ts index 3a6b3ed96..801fcf183 100644 --- a/refact-agent/gui/src/services/refact/path.ts +++ b/refact-agent/gui/src/services/refact/path.ts @@ -132,17 +132,6 @@ export const pathApi = createApi({ ); }, }), - bringYourOwnKeyPath: builder.query({ - queryFn: async (_arg, api, extraOptions, baseQuery) => { - return await fetchPath( - api, - baseQuery, - extraOptions, - CONFIG_PATH_URL, - "/bring-your-own-key.yaml", - ); - }, - }), integrationsPath: builder.query({ queryFn: async (_arg, api, extraOptions, baseQuery) => { return await fetchPath( diff --git a/refact-agent/gui/src/services/refact/providers.ts b/refact-agent/gui/src/services/refact/providers.ts new file mode 100644 index 000000000..eb9592072 --- /dev/null +++ b/refact-agent/gui/src/services/refact/providers.ts @@ -0,0 +1,362 @@ +import { RootState } from "../../app/store"; +import { hasProperty } from "../../utils"; +import { isDetailMessage } from "./commands"; +import { + CONFIGURED_PROVIDERS_URL, + PROVIDER_TEMPLATES_URL, + PROVIDER_URL, +} from "./consts"; +import { createApi, fetchBaseQuery } from "@reduxjs/toolkit/query/react"; + +export const providersApi = createApi({ + reducerPath: "providers", + tagTypes: [ + "PROVIDERS", + "TEMPLATE_PROVIDERS", + "CONFIGURED_PROVIDERS", + "PROVIDER", + ], + baseQuery: fetchBaseQuery({ + prepareHeaders: (headers, { getState }) => { + const token = (getState() as RootState).config.apiKey; + if (token) { + headers.set("Authorization", `Bearer ${token}`); + } + return headers; + }, + }), + endpoints: (builder) => ({ + getConfiguredProviders: builder.query< + ConfiguredProvidersResponse, + undefined + >({ + queryFn: async (_args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${CONFIGURED_PROVIDERS_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (!isConfiguredProvidersResponse(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/providers", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + providesTags: [{ type: "CONFIGURED_PROVIDERS", id: "LIST" }], + }), + getProviderTemplates: builder.query({ + providesTags: ["TEMPLATE_PROVIDERS"], + queryFn: async (_args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${PROVIDER_TEMPLATES_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (!isProviderTemplatesResponse(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/provider-templates", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + getProvider: builder.query({ + providesTags: ["PROVIDER"], + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${PROVIDER_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "GET", + url, + params: { + "provider-name": args.providerName, + }, + credentials: "same-origin", + redirect: "follow", + }); + + if (result.error) { + return { error: result.error }; + } + + if (!isProvider(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/provider", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + updateProvider: builder.mutation({ + invalidatesTags: (_result, _error, args) => [ + { type: "PROVIDER", id: args.name }, + ], + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${PROVIDER_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "POST", + url, + body: { ...args }, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (isDetailMessage(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/provider", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + deleteProvider: builder.mutation({ + invalidatesTags: (_result, _error, args) => [ + { type: "PROVIDER", id: args }, + ], + queryFn: async (args, api, extraOptions, baseQuery) => { + const state = api.getState() as RootState; + const port = state.config.lspPort as unknown as number; + const url = `http://127.0.0.1:${port}${PROVIDER_URL}`; + + const result = await baseQuery({ + ...extraOptions, + method: "DELETE", + url, + params: { + "provider-name": args, + }, + credentials: "same-origin", + redirect: "follow", + }); + if (result.error) { + return { error: result.error }; + } + if (isDetailMessage(result.data)) { + return { + meta: result.meta, + error: { + error: "Invalid response from /v1/provider", + data: result.data, + status: "CUSTOM_ERROR", + }, + }; + } + + return { data: result.data }; + }, + }), + }), + refetchOnMountOrArgChange: true, +}); + +export type Provider = { + name: string; + endpoint_style: "openai" | "hf"; + chat_endpoint: string; + completion_endpoint: string; + embedding_endpoint: string; + api_key: string; + + chat_default_model: string; + chat_thinking_model: string; + chat_light_model: string; + + enabled: boolean; + readonly: boolean; + supports_completion?: boolean; +}; + +export type SimplifiedProvider< + T extends keyof Provider | undefined = undefined, +> = [T] extends [undefined] + ? Partial + : Required>; + +export type ErrorLogInstance = { + path: string; + error_line: number; + error_msg: string; +}; + +export type ConfiguredProvidersResponse = { + providers: SimplifiedProvider< + "name" | "enabled" | "readonly" | "supports_completion" + >[]; + error_log: ErrorLogInstance[]; +}; + +export type ProviderTemplatesResponse = { + provider_templates: SimplifiedProvider<"name">[]; +}; + +export const providersEndpoints = providersApi.endpoints; + +export function isProvider(data: unknown): data is Provider { + if (typeof data !== "object" || data === null) return false; + + if ( + !hasProperty(data, "name") || + !hasProperty(data, "endpoint_style") || + !hasProperty(data, "chat_endpoint") || + !hasProperty(data, "completion_endpoint") || + !hasProperty(data, "embedding_endpoint") || + !hasProperty(data, "api_key") || + !hasProperty(data, "chat_default_model") || + !hasProperty(data, "chat_thinking_model") || + !hasProperty(data, "chat_light_model") || + !hasProperty(data, "enabled") + ) + return false; + + if (typeof data.name !== "string") return false; + if (data.endpoint_style !== "openai" && data.endpoint_style !== "hf") + return false; + if (typeof data.chat_endpoint !== "string") return false; + if (typeof data.completion_endpoint !== "string") return false; + if (typeof data.embedding_endpoint !== "string") return false; + if (typeof data.api_key !== "string") return false; + if (typeof data.chat_default_model !== "string") return false; + if (typeof data.chat_thinking_model !== "string") return false; + if (typeof data.chat_light_model !== "string") return false; + if (typeof data.enabled !== "boolean") return false; + + return true; +} + +export function isConfiguredProvidersResponse( + data: unknown, +): data is ConfiguredProvidersResponse { + // Check if data is an object + if (typeof data !== "object" || data === null) return false; + + if (!hasProperty(data, "providers") || !hasProperty(data, "error_log")) + return false; + + if (!Array.isArray(data.providers)) return false; + + if (!Array.isArray(data.error_log)) return false; + + for (const provider of data.providers) { + if (!isSimplifiedProvider(provider)) return false; + } + + for (const errorLog of data.error_log) { + if (!isErrorLogInstance(errorLog)) return false; + } + + return true; +} + +export function isProviderTemplatesResponse( + data: unknown, +): data is ProviderTemplatesResponse { + if (typeof data !== "object" || data === null) return false; + + if (!hasProperty(data, "provider_templates")) return false; + + if (!Array.isArray(data.provider_templates)) return false; + + for (const template of data.provider_templates) { + if (!isSimplifiedProviderWithName(template)) return false; + } + + return true; +} + +function isSimplifiedProviderWithName( + template: unknown, +): template is SimplifiedProvider<"name"> { + if (typeof template !== "object" || template === null) return false; + + if (!hasProperty(template, "name")) return false; + + return typeof template.name === "string"; +} + +function isSimplifiedProvider( + provider: unknown, +): provider is SimplifiedProvider<"name" | "enabled"> { + if (typeof provider !== "object" || provider === null) return false; + + if (!hasProperty(provider, "name") || !hasProperty(provider, "enabled")) + return false; + + if ( + hasProperty(provider, "readonly") && + typeof provider.readonly !== "boolean" + ) + return false; + + return ( + typeof provider.name === "string" && typeof provider.enabled === "boolean" + ); +} + +function isErrorLogInstance(errorLog: unknown): errorLog is ErrorLogInstance { + if (typeof errorLog !== "object" || errorLog === null) return false; + + if ( + !hasProperty(errorLog, "path") || + !hasProperty(errorLog, "error_line") || + !hasProperty(errorLog, "error_msg") + ) + return false; + + return ( + typeof errorLog.path === "string" && + typeof errorLog.error_line === "number" && + typeof errorLog.error_msg === "string" + ); +} diff --git a/refact-agent/gui/src/utils/hasProperty.ts b/refact-agent/gui/src/utils/hasProperty.ts new file mode 100644 index 000000000..1e205b4d8 --- /dev/null +++ b/refact-agent/gui/src/utils/hasProperty.ts @@ -0,0 +1,6 @@ +export function hasProperty( + obj: object, + prop: T, +): obj is { [K in T]: unknown } { + return prop in obj; +} diff --git a/refact-agent/gui/src/utils/index.ts b/refact-agent/gui/src/utils/index.ts index e61b563f8..825c6c6ae 100644 --- a/refact-agent/gui/src/utils/index.ts +++ b/refact-agent/gui/src/utils/index.ts @@ -10,3 +10,4 @@ export * from "./partition"; export * from "./fencedBackticks"; export * from "./isAbsolutePath"; export * from "./isDetailMessage"; +export * from "./hasProperty"; diff --git a/refact-server/Dockerfile.base b/refact-server/Dockerfile.base index ee74375c6..62d73e0f7 100644 --- a/refact-server/Dockerfile.base +++ b/refact-server/Dockerfile.base @@ -34,6 +34,8 @@ RUN pip install ninja RUN pip install packaging==24.1 setuptools==70.0.0 setuptools-scm==8.1.0 ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=60;61;70;75;80;86;89;90+PTX" RUN pip install -v --no-build-isolation git+https://github.com/smallcloudai/vllm@refact_v0.7.3 +# Or pull the vLLM GPU wheel directly from PyPI instead of building from source +# RUN pip install --no-cache-dir vllm==0.7.3 # there is no prebuild auto-gptq with torch 2.5.0 support ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"